framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,16,2,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,16,4,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,16,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,16,2,0,0.014645333091417948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,16,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,16,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,16,1,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,16,1,0,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,16,64,0,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,16,4,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,16,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,16,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,32,4,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,16,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,16,64,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,32,2,0,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,32,1,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,32,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,32,16,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,32,32,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,32,64,0,0.014592000593741735
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,32,1,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,32,2,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,32,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,32,32,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,32,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,32,64,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,32,4,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,64,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,64,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,64,4,0,0.019029332945744198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,64,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,64,8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,64,64,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,64,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,64,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,64,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,64,16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,64,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,64,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,128,1,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,128,2,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,128,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,128,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,128,4,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,128,1,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,128,4,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,128,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,128,8,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,128,2,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,128,32,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,128,16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,128,32,0,0.016597333053747814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,128,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,256,1,0,0.022517333428064983
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,256,4,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,256,8,0,0.021146667500336964
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,256,2,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,256,32,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,256,16,0,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,256,1,0,0.021194666624069214
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,256,64,0,0.02048533285657565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,256,2,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,256,4,0,0.021130666136741638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,256,16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,256,8,0,0.02117866774400075
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,256,64,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,512,4,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,512,1,0,0.031034665803114574
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,512,8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,512,16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,512,2,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,512,32,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,512,64,0,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,512,1,0,0.03072533259789149
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,512,2,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,512,8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,512,16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,512,64,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,512,4,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,1024,16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,1024,8,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,1024,1,0,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,1024,32,0,0.029343999922275543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,1024,4,0,0.03311999887228012
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,1024,2,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,1024,64,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,1024,4,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,1024,2,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,1024,8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,1024,16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,1024,64,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,1024,1,0,0.062090665102005005
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,1024,32,0,0.03036266565322876
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,1536,4,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,1536,2,0,0.06038933495680491
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,1536,1,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,1536,8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,1536,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,1536,64,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,1536,16,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,1536,16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,1536,2,0,0.062133332093556724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,1536,4,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,1536,1,0,0.09795733292897542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,1536,8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,1536,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,2048,8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,2048,4,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,2048,16,0,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,2048,32,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,2048,64,0,0.044026667873064675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,2048,2,0,0.085999995470047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,2048,1,0,0.134853333234787
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,2048,16,0,0.04538666705290476
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,2048,32,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,2048,8,0,0.04679466784000397
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,2048,64,0,0.044010668992996216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,2048,4,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,2048,2,0,0.08567999800046285
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,2048,1,0,0.13551466663678488
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,3072,32,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,3072,64,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,3072,8,0,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,3072,16,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,3072,4,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,3072,2,0,0.14149866501490274
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,3072,1,0,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,3072,64,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,3072,16,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,3072,32,0,0.058037335673967995
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,3072,8,0,0.062133332093556724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,3072,4,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,3072,2,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,3072,1,0,0.23278933763504028
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,4096,64,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,4096,32,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,4096,16,0,0.07442666590213776
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,4096,8,0,0.07645333309968312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,4096,4,0,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,4096,2,0,0.20617065827051798
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,4096,32,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,4096,16,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,4096,8,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,4096,4,0,0.1346560021241506
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,4096,64,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,4096,1,0,0.35516266028086346
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,4096,2,0,0.20546666781107584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,6144,32,0,0.10070400436719258
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,6144,64,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,6144,16,0,0.10274666547775269
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,4096,1,0,0.35549334685007733
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,6144,8,0,0.14403733611106873
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,6144,4,0,0.22835199038187662
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,6144,16,0,0.10309867064158122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,6144,8,0,0.1431893308957418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,6144,2,0,0.37495466073354083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,6144,32,0,0.0993386705716451
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,6144,4,0,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,6144,64,0,0.0986346701780955
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,6144,2,0,0.3756320079167684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,6144,1,0,0.694271961847941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,8192,16,0,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,8192,32,0,0.12935466567675272
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,8192,64,0,0.1269706686337789
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,8192,8,0,0.22223466634750366
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,8192,4,0,0.34426132837931317
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,6144,1,0,0.6877973079681396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,8192,16,0,0.1378986636797587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,8192,8,0,0.22153067588806152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,8192,2,0,0.6045013268788656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,8192,32,0,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,8192,64,0,0.12662933270136514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,8192,4,0,0.34594134489695233
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,8192,2,0,0.6017706791559855
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,10240,16,0,0.18312533696492514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,10240,32,0,0.15495999654134116
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,8192,1,0,1.1289599736531575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,10240,8,0,0.3049866755803426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,10240,4,0,0.4875946839650472
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,10240,64,0,0.15411200126012167
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,8192,1,0,1.1234986782073975
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,10240,16,0,0.18141865730285645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,10240,2,0,0.8881493409474691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,10240,32,0,0.15633599956830344
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,10240,8,0,0.3036160071690877
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,10240,4,0,0.4882719914118449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,10240,64,0,0.1534293293952942
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,12288,16,0,0.24951465924580893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,10240,2,0,0.8813280264536539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,10240,1,0,1.6614400545756023
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,12288,8,0,0.3935573498408
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,12288,4,0,0.6601440111796061
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,12288,32,0,0.1867093245188395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,12288,64,0,0.1803893248240153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,12288,2,0,1.2107093334197998
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,10240,1,0,1.67304531733195
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,12288,16,0,0.2481600046157837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,12288,8,0,0.39492801825205487
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,12288,32,0,0.187391996383667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,12288,64,0,0.18107734123865762
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,12288,4,0,0.6611520051956177
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,16384,16,0,0.3894666830698649
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,12288,2,0,1.2120746771494548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,16384,8,0,0.6174773375193278
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,1,16384,64,0,0.23620800177256265
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,16384,32,0,0.2678026755650838
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,12288,1,0,2.3432532946268716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,16384,4,0,1.086634635925293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,16384,16,0,0.3875733216603597
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,16384,8,0,0.6208693186442057
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,12288,1,0,2.307765324910482
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,16384,2,0,2.0712052981058755
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,16384,32,0,0.26709334055582684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,16,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,1,16384,64,0,0.23892800013224283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,16384,4,0,1.0883413155873616
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,16,4,0,0.014901333798964819
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,16,2,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,16,8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,16,64,0,0.01533866673707962
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,16,32,0,0.01571200042963028
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,16,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,16,1,0,0.01498666654030482
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,16,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,16,4,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,16,16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,16,8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,16,64,0,0.017605333278576534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,16,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,16384,2,0,2.040837287902832
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,32,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,32,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,32,8,0,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,32,4,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,32,64,0,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,32,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,32,32,0,0.015040000279744467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,32,2,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,32,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,32,4,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,32,8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,32,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,32,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,32,16,0,0.02314666658639908
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,64,1,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,16384,1,0,3.975525220235189
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,64,2,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,64,4,0,0.016389333953460056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,64,8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,64,16,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,64,64,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,64,1,0,0.019141333798567455
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,64,4,0,0.016943999876578648
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,64,2,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,64,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,64,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,64,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,64,64,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,128,2,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,128,8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,128,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,128,1,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,128,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,16384,1,0,4.0960267384847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,128,32,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,128,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,128,2,0,0.018751999984184902
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,128,1,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,128,4,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,128,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,128,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,128,32,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,128,64,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,256,1,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,256,2,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,256,4,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,256,8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,256,32,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,256,64,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,256,16,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,256,1,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,256,4,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,256,8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,256,2,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,256,64,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,256,16,0,0.020784000555674236
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,512,4,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,512,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,512,16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,512,1,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,512,64,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,512,2,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,512,32,0,0.023232000569502514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,512,1,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,512,2,0,0.030058667063713074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,512,4,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,512,16,0,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,512,8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,512,32,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,1024,16,0,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,1024,4,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,1024,8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,1024,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,1024,2,0,0.06039999922116598
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,1024,64,0,0.03072533259789149
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,1024,1,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,1024,4,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,1024,8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,1024,32,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,1024,64,0,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,1024,2,0,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,1024,16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,1024,1,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,1536,32,0,0.038906666139761605
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,1536,16,0,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,1536,8,0,0.04330133398373922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,1536,4,0,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,1536,2,0,0.09761599699656169
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,1536,64,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,1536,1,0,0.1628266672293345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,1536,16,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,1536,8,0,0.04301866888999939
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,1536,32,0,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,1536,64,0,0.037178667883078255
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,1536,4,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,1536,2,0,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,2048,8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,1536,1,0,0.16316266854604086
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,2048,32,0,0.04538666705290476
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,2048,16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,2048,64,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,2048,4,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,2048,2,0,0.13517333070437113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,2048,32,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,2048,8,0,0.050885334610939026
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,2048,64,0,0.04472533365090688
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,2048,1,0,0.23378666241963705
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,2048,4,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,2048,16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,2048,2,0,0.13499200344085693
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,3072,32,0,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,3072,16,0,0.0634933312733968
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,3072,8,0,0.08942400415738423
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,2048,1,0,0.23517332474390665
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,3072,4,0,0.14096533258756003
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,3072,64,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,3072,2,0,0.23279466231664023
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,3072,16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,3072,8,0,0.0897706647713979
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,3072,64,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,3072,4,0,0.14045866330464682
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,3072,32,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,3072,2,0,0.23278933763504028
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,3072,1,0,0.41899200280507404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,4096,8,0,0.13569066921869913
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,4096,32,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,4096,64,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,4096,16,0,0.07817066709200542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,4096,4,0,0.20855466524759927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,3072,1,0,0.420693318049113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,4096,2,0,0.3561600049336751
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,4096,16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,4096,8,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,4096,32,0,0.07477866609891255
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,4096,4,0,0.20924800634384155
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,4096,64,0,0.07234666744867961
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,4096,2,0,0.35652267932891846
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,4096,1,0,0.6645706494649252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,6144,32,0,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,6144,8,0,0.23040000597635904
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,6144,64,0,0.10105599959691365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,6144,16,0,0.1460906664530436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,6144,4,0,0.37905065218607586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,4096,1,0,0.6662773291269938
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,6144,16,0,0.1460906664530436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,6144,32,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,6144,2,0,0.6952906449635824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,6144,8,0,0.2290560007095337
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,6144,64,0,0.10069866975148518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,6144,4,0,0.377674659093221
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,8192,16,0,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,8192,8,0,0.3476533492406209
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,6144,2,0,0.6908586819966634
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,8192,32,0,0.14848533272743225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,8192,64,0,0.13192533453305563
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,8192,4,0,0.6041599909464518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,6144,1,0,1.3346187273661296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,8192,16,0,0.22323199113210043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,8192,8,0,0.3476426601409912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,8192,2,0,1.1415893236796062
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,6144,1,0,1.3178826967875164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,8192,64,0,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,8192,32,0,0.1479680041472117
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,8192,4,0,0.605183998743693
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,10240,16,0,0.30634133021036786
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,8192,2,0,1.12827730178833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,10240,8,0,0.4978239933649699
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,10240,32,0,0.1899413267771403
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,10240,64,0,0.16913066307703653
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,8192,1,0,2.188261349995931
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,10240,4,0,0.8816640377044678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,10240,16,0,0.3063466747601827
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,10240,8,0,0.4920320113499959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,10240,2,0,1.6727040608723958
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,8192,1,0,2.184879938761393
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,10240,64,0,0.1684266726175944
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,10240,4,0,0.8826826413472494
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,10240,32,0,0.19114132722218832
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,12288,16,0,0.3962826728820801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,12288,8,0,0.6652586857477824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,10240,2,0,1.677669366200765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,12288,32,0,0.25565866629282635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,12288,64,0,0.21504533290863037
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,12288,4,0,1.2209493319193523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,10240,1,0,3.4048213958740234
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,12288,8,0,0.6686720053354899
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,12288,16,0,0.39765334129333496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,12288,2,0,2.325541337331136
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,12288,64,0,0.2153759996096293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,12288,4,0,1.2168479760487874
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,12288,32,0,0.2553173303604126
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,10240,1,0,3.420186678568522
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,16384,16,0,0.6299306551615397
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,12288,2,0,2.3149174054463706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,16384,8,0,1.0985759894053142
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,16384,32,0,0.3993599812189738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,2,16384,64,0,0.28757866223653156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,16384,4,0,2.0387840270996094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,12288,1,0,4.910421371459961
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,16384,8,0,1.0951680342356365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,16384,16,0,0.6254986524581909
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,16384,4,0,2.03604793548584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,12288,1,0,4.855824152628581
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,16,1,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,16,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,2,16384,64,0,0.2872213323911031
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,16384,2,0,3.9975465138753257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,16384,32,0,0.39424534638722736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,16,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,16,8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,16,16,0,0.016389333953460056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,16,1,0,0.02013333390156428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,16,64,0,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,16,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,16,2,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,16,8,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,16,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,16,16,0,0.02595199892918269
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,16,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,16,32,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,32,1,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,32,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,32,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,32,8,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,32,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,32,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,32,1,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,32,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,32,32,0,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,32,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,32,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,16384,2,0,4.015621185302734
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,32,16,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,32,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,32,64,0,0.01509333277742068
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,64,2,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,64,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,64,1,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,64,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,64,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,64,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,64,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,64,1,0,0.023557332654794056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,64,4,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,64,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,64,2,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,64,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,64,32,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,128,16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,128,2,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,128,8,0,0.017765333255132038
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,128,1,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,128,32,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,128,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,128,64,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,128,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,128,8,0,0.017045332739750545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,128,1,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,128,4,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,128,2,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,128,16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,16384,1,0,8.505002975463867
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,128,64,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,256,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,256,16,0,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,256,8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,256,4,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,256,2,0,0.025248001019159954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,256,32,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,256,1,0,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,256,64,0,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,256,1,0,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,256,2,0,0.0252960001428922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,256,4,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,256,8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,256,32,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,256,16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,512,64,0,0.023189333577950794
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,512,16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,512,2,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,512,8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,16384,1,0,8.548543930053711
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,512,4,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,512,1,0,0.08257066706816356
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,512,32,0,0.02458133300145467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,512,8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,512,64,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,512,4,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,512,16,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,512,2,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,512,1,0,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,1024,16,0,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,1024,8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,1024,4,0,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,1024,64,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,1024,32,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,1024,2,0,0.09967466195424397
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,1024,16,0,0.03312533348798752
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,1024,64,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,1024,1,0,0.17715734243392944
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,1024,8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,1024,4,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,1024,32,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,1024,2,0,0.10206933816274007
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,1536,32,0,0.03958933303753535
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,1536,8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,1536,16,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,1024,1,0,0.1788533329963684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,1536,64,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,1536,4,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,1536,2,0,0.1629866659641266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,1536,32,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,1536,16,0,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,1536,8,0,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,1536,64,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,1536,4,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,1536,2,0,0.16366933782895407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,1536,1,0,0.2954240043958028
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,2048,16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,2048,8,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,2048,32,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,2048,4,0,0.1372160017490387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,2048,64,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,1536,1,0,0.2967840035756429
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,2048,2,0,0.23654399315516153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,2048,16,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,2048,8,0,0.0888426701227824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,2048,4,0,0.13755733768145242
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,2048,64,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,2048,32,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,2048,2,0,0.23381867011388144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,2048,1,0,0.4346826473871867
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,3072,16,0,0.0942026674747467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,3072,32,0,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,3072,8,0,0.1431893308957418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,3072,64,0,0.06278933087984721
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,3072,4,0,0.23517866929372153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,2048,1,0,0.43536531925201416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,3072,16,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,3072,8,0,0.1442026694615682
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,3072,2,0,0.4254719813664754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,3072,64,0,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,3072,4,0,0.23653332392374674
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,3072,32,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,3072,2,0,0.42239999771118164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,4096,16,0,0.1381439963976542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,4096,8,0,0.2126506765683492
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,4096,32,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,4096,64,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,4096,4,0,0.3609600067138672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,3072,1,0,0.8046933015187582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,4096,16,0,0.13927466670672098
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,3072,1,0,0.8070826530456543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,4096,8,0,0.21299733718236288
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,4096,2,0,0.6765226523081461
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,4096,32,0,0.08839999636014302
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,4096,4,0,0.36026132106781006
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,4096,64,0,0.07645333309968312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,6144,16,0,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,4096,2,0,0.669701337814331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,6144,8,0,0.38518933455149335
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,6144,64,0,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,6144,32,0,0.15223999818166098
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,6144,4,0,0.6973600387573242
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,4096,1,0,1.2876799901326497
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,6144,16,0,0.23312532901763916
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,6144,8,0,0.38519465923309326
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,4096,1,0,1.2924693425496419
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,6144,32,0,0.15359999736150107
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,6144,2,0,1.3421173095703125
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,6144,4,0,0.6983679930369059
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,6144,64,0,0.12324266632397969
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,8192,16,0,0.35276798407236737
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,8192,8,0,0.6116693417231241
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,6144,2,0,1.327445348103841
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,8192,32,0,0.2300586700439453
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,4,8192,64,0,0.16180800398190817
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,8192,4,0,1.1528533299763997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,6144,1,0,2.5850987434387207
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,8192,16,0,0.3537919918696086
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,8192,8,0,0.6195199886957804
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,8192,2,0,2.1831520398457847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,4,8192,64,0,0.1629866659641266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,8192,32,0,0.23040000597635904
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,6144,1,0,2.6100053787231445
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,16,1,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,8192,4,0,1.140229304631551
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,16,2,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,16,4,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,16,16,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,16,8,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,16,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,16,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,16,1,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,16,2,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,16,8,0,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,16,4,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,16,32,0,0.01534933348496755
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,16,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,16,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,32,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,32,1,0,0.031082667410373688
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,32,2,0,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,32,16,0,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,32,8,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,32,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,32,64,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,32,1,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,32,2,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,32,8,0,0.014991999914248785
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,32,16,0,0.015365333606799444
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,8192,2,0,2.190341313680013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,32,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,32,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,64,2,0,0.023887999355793
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,32,32,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,64,4,0,0.016970666746298473
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,64,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,64,1,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,64,8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,64,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,64,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,64,2,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,64,4,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,64,1,0,0.03517866631348928
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,64,8,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,64,32,0,0.01498666654030482
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,64,64,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,128,2,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,128,4,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,128,16,0,0.017045332739750545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,128,8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,128,1,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,128,32,0,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,128,64,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,128,16,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,128,32,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,8192,1,0,4.343466758728027
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,128,8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,128,2,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,128,1,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,128,4,0,0.020143999407688778
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,128,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,256,64,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,256,4,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,256,8,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,256,16,0,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,256,32,0,0.021183999876181286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,256,2,0,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,256,1,0,0.06861333549022675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,256,32,0,0.020794666061798733
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,256,8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,256,4,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,256,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,256,2,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,256,16,0,0.022175999979178112
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,256,1,0,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,512,16,0,0.027989332874615986
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,512,8,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,512,32,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,8192,1,0,4.669610659281413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,512,4,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,512,2,0,0.0825973351796468
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,512,64,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,512,32,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,512,16,0,0.027317332724730175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,512,4,0,0.051221330960591636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,512,8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,512,1,0,0.14645866552988687
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,512,2,0,0.0846453309059143
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,1024,64,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,1024,32,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,512,1,0,0.1474506656328837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,1024,16,0,0.039077334105968475
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,1024,8,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,1024,4,0,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,1024,32,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,1024,16,0,0.03893866638342539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,1024,8,0,0.06690666576226552
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,1024,2,0,0.179365336894989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,1024,4,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,1024,64,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,1536,16,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,1024,2,0,0.1786880095799764
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,1536,8,0,0.10239467024803162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,1536,32,0,0.045050665736198425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,1024,1,0,0.3336533308029175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,1536,64,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,1536,4,0,0.16776533921559653
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,1024,1,0,0.3326293428738912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,1536,16,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,1536,2,0,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,1536,32,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,1536,8,0,0.10339732964833577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,1536,64,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,1536,4,0,0.16659733653068542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,2048,16,0,0.0918239951133728
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,1536,2,0,0.3002026677131653
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,2048,8,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,2048,32,0,0.057018667459487915
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,1536,1,0,0.5611519813537598
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,2048,4,0,0.24132267634073892
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,2048,64,0,0.05049066742261251
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,2048,16,0,0.09149866302808125
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,2048,8,0,0.14217066764831543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,2048,2,0,0.4384426673253377
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,1536,1,0,0.5679786602656046
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,2048,32,0,0.05700799822807312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,2048,64,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,2048,4,0,0.23961599667867026
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,3072,16,0,0.14916266997655234
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,2048,2,0,0.438101331392924
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,3072,8,0,0.24234666426976523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,3072,32,0,0.10171199838320415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,3072,64,0,0.07714666426181793
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,2048,1,0,0.8451733589172363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,3072,4,0,0.42922667662302655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,3072,16,0,0.1493280033270518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,2048,1,0,0.8495840231577555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,3072,8,0,0.24166399240493774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,3072,32,0,0.10068800052007039
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,3072,64,0,0.07679466903209686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,3072,2,0,0.8241493701934814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,3072,4,0,0.42923200130462646
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,4096,16,0,0.2208426594734192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,3072,2,0,0.8111786842346191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,4096,8,0,0.37050668398539227
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,4096,32,0,0.14643200238545737
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,8,4096,64,0,0.10034132997194926
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,4096,4,0,0.6830080350240072
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,3072,1,0,1.5774772961934407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,4096,16,0,0.2198293407758077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,4096,8,0,0.3705173333485921
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,4096,2,0,1.2996266682942708
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,4096,4,0,0.6751573085784912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,3072,1,0,1.5825920104980469
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,4096,32,0,0.14591999848683676
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,8,4096,64,0,0.10101333260536194
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,16,1,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,16,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,16,4,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,16,8,0,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,16,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,16,16,0,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,16,1,0,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,16,64,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,16,2,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,16,4,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,16,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,16,8,0,0.016048000504573185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,16,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,16,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,32,1,0,0.04574933151404063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,32,2,0,0.031040000418821972
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,32,4,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,32,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,32,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,32,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,32,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,4096,2,0,1.2965546449025471
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,32,2,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,32,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,32,1,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,32,16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,32,4,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,32,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,32,32,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,64,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,64,4,0,0.023221333821614582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,64,2,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,64,1,0,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,64,32,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,64,8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,64,64,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,64,4,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,64,32,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,64,2,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,64,8,0,0.01703466723362605
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,64,1,0,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,64,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,64,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,128,16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,128,4,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,128,8,0,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,128,2,0,0.04155733436346054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,128,64,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,128,32,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,128,1,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,4096,1,0,2.5506134033203125
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,128,32,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,128,8,0,0.019786667078733444
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,128,4,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,128,2,0,0.041989331444104515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,128,64,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,128,16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,128,1,0,0.06963733335336049
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,256,32,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,256,16,0,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,256,64,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,256,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,256,4,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,256,2,0,0.07133333384990692
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,256,32,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,256,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,256,8,0,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,256,1,0,0.12321600317955017
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,256,4,0,0.04162666698296865
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,256,64,0,0.022533332308133442
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,256,2,0,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,512,16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,512,32,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,512,8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,256,1,0,0.12356799840927124
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,512,64,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,4096,1,0,2.5793066024780273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,512,4,0,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,512,32,0,0.027994667490323383
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,512,2,0,0.1479680041472117
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,512,16,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,512,8,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,512,64,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,512,4,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,512,2,0,0.14847999811172485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,1024,16,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,512,1,0,0.2728959918022156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,1024,8,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,1024,32,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,1024,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,1024,4,0,0.18533867597579956
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,512,1,0,0.2739199995994568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,1024,16,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,1024,32,0,0.04197333256403605
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,1024,8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,1024,64,0,0.03687999894221624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,1024,2,0,0.34115731716156006
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,1024,4,0,0.1855413317680359
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,1536,16,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,1024,2,0,0.33774932225545246
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,1536,8,0,0.17577600479125977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,1536,32,0,0.07235733171304067
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,1536,64,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,1536,4,0,0.3046346704165141
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,1024,1,0,0.6418773333231608
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,1536,16,0,0.10920533537864685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,1024,1,0,0.646997332572937
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,1536,8,0,0.1752799948056539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,1536,2,0,0.5676373243331909
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,1536,32,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,1536,64,0,0.052245333790779114
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,1536,4,0,0.30326932668685913
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,2048,16,0,0.15018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,2048,8,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,2048,32,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,1536,2,0,0.5662879943847656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,16,2048,64,0,0.068271999557813
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,2048,4,0,0.44731732209523517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,1536,1,0,1.104042689005534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,2048,8,0,0.24985599517822266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,2048,16,0,0.15038933356602988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,2048,2,0,0.8461653391520182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,1536,1,0,1.1122453212738037
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,2048,4,0,0.4479999939600627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,16,1,0,0.07373333474000295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,16,2048,64,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,2048,32,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,16,8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,16,4,0,0.030042665700117748
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,16,2,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,32,16,64,0,0.015002666662136713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,16,16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,16,32,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,16,4,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,16,2,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,16,1,0,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,16,8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,16,16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,16,32,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,32,16,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,32,8,0,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,32,4,0,0.031040000418821972
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,32,2,0,0.046426668763160706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,32,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,32,1,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,32,16,0,0.01709866647919019
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,32,32,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,2048,2,0,0.8485653400421143
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,32,2,0,0.04675200084845225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,32,8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,32,4,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,32,1,0,0.08121599753697713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,32,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,32,32,0,0.016762666404247284
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,32,32,64,0,0.014991999914248785
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,64,32,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,64,8,0,0.023189333577950794
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,64,16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,64,4,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,32,64,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,64,2,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,64,1,0,0.09318400422732036
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,64,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,64,4,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,64,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,64,2,0,0.056330665946006775
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,32,64,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,64,1,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,128,8,0,0.0286613330245018
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,128,32,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,128,16,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,128,4,0,0.04659200211366018
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,2048,1,0,1.6610986391703289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,128,2,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,32,128,64,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,128,32,0,0.018794666975736618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,128,16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,128,8,0,0.028330666323502857
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,128,4,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,128,1,0,0.12116799751917522
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,32,128,64,0,0.018085333208243053
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,128,2,0,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,256,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,256,16,0,0.02699200063943863
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,128,1,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,256,8,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,256,4,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,32,256,64,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,256,2,0,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,256,16,0,0.027642667293548584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,256,8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,2048,1,0,1.6501706441243489
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,256,32,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,256,4,0,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,32,256,64,0,0.022837333381175995
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,256,2,0,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,512,16,0,0.05665599803129832
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,256,1,0,0.23108800252278647
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,512,8,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,512,32,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,32,512,64,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,256,1,0,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,512,4,0,0.1520639955997467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,512,32,0,0.036176001032193504
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,512,16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,512,8,0,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,32,512,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,512,2,0,0.2797226707140605
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,512,4,0,0.15359999736150107
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,1024,16,0,0.1160533328851064
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,512,2,0,0.27802133560180664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,1024,8,0,0.19490132729212442
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,32,1024,64,0,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,1024,32,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,512,1,0,0.5287253459294637
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,1024,4,0,0.34867199261983234
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,512,1,0,0.5307679971059164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,1024,16,0,0.1181013286113739
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,1024,8,0,0.19421867529551187
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,32,1024,64,0,0.053957333167394005
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,1024,32,0,0.07780799766381581
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,16,4,0,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,16,2,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,16,1,0,0.13414399822553
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,1024,4,0,0.3449173370997111
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,16,16,0,0.02993600070476532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,16,8,0,0.03072533259789149
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,1024,2,0,0.6621813376744589
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,16,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,64,16,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,16,4,0,0.04506133496761322
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,16,2,0,0.07339199880758922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,16,8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,16,1,0,0.13448533415794373
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,16,16,0,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,16,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,64,16,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,32,2,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,32,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,32,4,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,32,8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,32,32,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,32,1,0,0.1443839967250824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,64,32,64,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,32,4,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,32,16,0,0.021840001145998638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,1024,2,0,0.6550133228302002
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,32,8,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,32,2,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,32,32,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,64,32,64,0,0.017477333545684814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,32,1,0,0.14353600144386292
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,64,16,0,0.02422933280467987
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,64,8,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,64,4,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,64,2,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,64,64,64,0,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,64,32,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,64,8,0,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,64,1,0,0.16964266697565714
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,64,16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,64,2,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,64,4,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,64,32,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,64,64,64,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,64,1,0,0.16964266697565714
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,1024,1,0,1.2665226459503174
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,128,16,0,0.030042665700117748
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,128,8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,64,128,64,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,128,32,0,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,128,4,0,0.07165866593519847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,128,2,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,128,8,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,128,32,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,128,4,0,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,128,16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,64,128,64,0,0.01907733331123988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,128,2,0,0.12219732999801636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,1024,1,0,1.2651519775390625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,128,1,0,0.22425599892934164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,256,16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,256,8,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,256,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,128,1,0,0.2228906750679016
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,64,256,64,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,256,4,0,0.13056000073750815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,256,8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,256,16,0,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,256,32,0,0.029690665503342945
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,64,256,64,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,256,2,0,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,256,4,0,0.13243200381596884
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,256,2,0,0.237226665019989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,512,16,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,512,8,0,0.1634773313999176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,512,32,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,64,512,64,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,256,1,0,0.4493653376897176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,512,4,0,0.2892853418986003
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,256,1,0,0.4456106821695964
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,512,16,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,512,8,0,0.16366933782895407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,512,32,0,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,64,512,64,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,512,4,0,0.288592000802358
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,16,2,0,0.134661336739858
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,16,1,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,512,2,0,0.5386293331782023
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,16,4,0,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,16,8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,16,16,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,16,32,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,128,16,64,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,16,4,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,16,2,0,0.13449066877365112
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,16,8,0,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,16,16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,16,1,0,0.24712532758712769
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,128,16,64,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,16,32,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,32,16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,32,8,0,0.04780800143877665
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,32,4,0,0.08293866614500682
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,32,32,0,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,128,32,64,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,512,2,0,0.5403306484222412
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,32,2,0,0.14455466469128928
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,32,1,0,0.2691253423690796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,32,4,0,0.08330133557319641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,32,16,0,0.03307733436425527
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,32,8,0,0.04914666712284088
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,32,32,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,32,2,0,0.14472533265749613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,128,32,64,0,0.020762667059898376
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,32,1,0,0.26709334055582684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,64,16,0,0.03791466603676478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,64,32,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,64,8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,128,64,64,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,64,4,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,64,2,0,0.17202667395273843
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,64,4,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,64,8,0,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,128,64,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,64,32,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,64,16,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,512,1,0,1.044986645380656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,64,2,0,0.1711626648902893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,64,1,0,0.3264853358268738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,128,16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,128,32,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,128,8,0,0.07680533329645793
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,512,1,0,1.0446560382843018
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,64,1,0,0.32477867603302
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,128,128,64,0,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,128,4,0,0.12662933270136514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,128,16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,128,32,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,128,8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,128,128,64,0,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,128,2,0,0.22801067431767783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,128,4,0,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,256,16,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,128,2,0,0.22766933838526407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,256,8,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,256,32,0,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,128,256,64,0,0.044026667873064675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,128,1,0,0.43398932615915936
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,256,4,0,0.2457759976387024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,128,1,0,0.43298133214314777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,256,16,0,0.08772800366083781
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,256,8,0,0.1423360009988149
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,256,32,0,0.059749335050582886
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,128,256,64,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,256,4,0,0.24748265743255615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,256,2,0,0.45892266432444256
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,256,16,4,0,0.13705066839853922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,256,16,16,0,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,256,16,2,0,0.2481493353843689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,256,16,8,0,0.07646400233109792
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,256,16,32,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,256,16,64,0,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,256,16,1,0,0.4814506769180298
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,256,16,16,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,256,16,4,0,0.13755200306574503
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,256,16,8,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,256,16,2,0,0.24951465924580893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,256,16,32,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,256,16,64,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,256,2,0,0.4561920166015625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,256,16,1,0,0.47564268112182617
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,256,32,32,0,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,256,32,16,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,256,32,8,0,0.08431466420491536
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,256,32,4,0,0.14574933052062988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,256,32,64,0,0.035487999518712364
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,256,32,2,0,0.2701653242111206
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,256,32,8,0,0.08602666854858398
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,256,32,16,0,0.049839998284975685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,256,32,4,0,0.14658666650454202
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,256,32,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,256,32,32,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,256,32,2,0,0.2691413362820943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,256,32,1,0,0.5232479969660441
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,256,1,0,0.8717599709828695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,256,64,32,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,256,64,16,0,0.0631520003080368
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,256,32,1,0,0.5229173501332601
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,256,64,8,0,0.10308800141016643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,256,64,64,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,256,64,4,0,0.1802240014076233
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,256,64,16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,256,1,0,0.8796106974283854
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,256,64,8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,256,64,64,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,256,64,4,0,0.17748800913492838
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,256,64,2,0,0.3302239974339803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,256,64,32,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,256,64,2,0,0.330949326356252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,256,128,16,0,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,256,128,32,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,1,256,128,64,0,0.047093331813812256
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,256,128,8,0,0.13893333077430725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,256,64,1,0,0.6411946614583334
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,256,128,4,0,0.23859200874964395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,256,64,1,0,0.6370986700057983
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,256,128,8,0,0.13857600092887878
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,256,128,32,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,256,128,16,0,0.08669333656628926
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,16,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,16,2,0,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,1,256,128,64,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,16,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,256,128,4,0,0.23855467637379965
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,16,8,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,16,16,0,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,16,32,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,256,128,2,0,0.44014934698740643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,16,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,16,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,16,2,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,16,8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,16,4,0,0.0249439999461174
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,16,32,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,16,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,32,1,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,16,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,32,2,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,32,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,32,4,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,32,8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,32,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,32,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,32,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,32,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,32,2,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,32,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,32,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,32,32,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,32,64,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,64,16,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,64,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,256,128,2,0,0.44049068291982013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,64,4,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,64,1,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,64,8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,64,32,0,0.01504533365368843
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,64,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,64,16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,64,2,0,0.01532799998919169
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,64,4,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,64,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,64,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,64,8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,64,64,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,128,1,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,128,32,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,128,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,128,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,128,4,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,128,16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,128,1,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,128,64,0,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,128,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,128,4,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,128,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,128,8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,128,16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,128,64,0,0.023845332364241283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,256,2,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,256,4,0,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,256,1,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,256,8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,256,16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,256,128,1,0,0.8495466709136963
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,256,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,256,2,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,256,4,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,256,8,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,256,16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,256,32,0,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,256,1,0,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,256,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,512,16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,512,32,0,0.0249493345618248
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,512,2,0,0.030378667016824085
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,512,8,0,0.025242666403452556
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,512,1,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,512,4,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,512,4,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,512,1,0,0.04880533119042715
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,512,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,512,16,0,0.0249439999461174
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,512,2,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,512,64,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,256,128,1,0,0.851967970530192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,1024,2,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,1024,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,1024,8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,1024,16,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,1024,64,0,0.029370665550231934
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,1024,4,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,1024,1,0,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,1024,2,0,0.06280000011126201
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,1024,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,1024,64,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,1024,1,0,0.09967999656995137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,1024,8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,1024,16,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,1024,4,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,1536,8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,1536,16,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,1536,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,1536,4,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,1536,2,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,1536,1,0,0.16127999623616537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,1536,32,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,1536,16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,1536,8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,1536,4,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,1536,2,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,1536,1,0,0.16127999623616537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,2048,16,0,0.046069333950678505
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,2048,8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,2048,32,0,0.04507199923197428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,2048,4,0,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,2048,64,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,2048,2,0,0.13501333196957907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,2048,16,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,2048,8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,2048,4,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,2048,64,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,2048,1,0,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,2048,32,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,2048,2,0,0.135343998670578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,2048,1,0,0.233130673567454
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,3072,16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,3072,8,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,3072,32,0,0.060746664802233376
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,3072,64,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,3072,4,0,0.14061866203943887
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,3072,2,0,0.2307466665903727
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,3072,32,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,3072,16,0,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,3072,64,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,3072,8,0,0.08839466174443562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,3072,4,0,0.14062399665514627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,3072,2,0,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,3072,1,0,0.42206398646036786
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,4096,16,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,4096,32,0,0.07404266794522603
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,4096,8,0,0.13448533415794373
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,4096,64,0,0.07406400144100189
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,4096,4,0,0.20753600200017294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,3072,1,0,0.4179679950078328
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,4096,2,0,0.3548159996668498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,4096,16,0,0.07817066709200542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,4096,8,0,0.13550933202107748
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,4096,64,0,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,4096,32,0,0.07407466570536296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,4096,4,0,0.2065066695213318
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,4096,2,0,0.3537919918696086
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,6144,16,0,0.1416373352209727
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,6144,8,0,0.22868800163269043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,4096,1,0,0.6628373463948568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,6144,32,0,0.10306666294733684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,6144,64,0,0.09898133079210918
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,6144,4,0,0.3752959966659546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,4096,1,0,0.6642346779505411
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,6144,16,0,0.14421866337458292
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,6144,8,0,0.22766399383544922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,6144,32,0,0.10274666547775269
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,6144,2,0,0.6959786415100098
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,6144,64,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,6144,4,0,0.3763253291447957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,8192,16,0,0.22291199366251627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,8192,8,0,0.34355199337005615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,6144,2,0,0.6888000170389811
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,8192,32,0,0.1353600025177002
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,8192,64,0,0.12730133533477783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,8192,4,0,0.6062080065409342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,6144,1,0,1.3253973325093586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,8192,16,0,0.2208426594734192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,8192,2,0,1.1275946299235027
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,8192,8,0,0.3466293414433797
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,6144,1,0,1.3141333262125652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,8192,4,0,0.5993813276290894
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,8192,32,0,0.1358506679534912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,8192,64,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,10240,16,0,0.3036106626192729
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,8192,2,0,1.1221386591593425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,10240,8,0,0.490666667620341
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,10240,32,0,0.1800373395284017
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,10240,64,0,0.15461333592732748
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,10240,4,0,0.8779093424479166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,8192,1,0,2.1855573654174805
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,10240,16,0,0.303274671236674
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,10240,8,0,0.4889599879582723
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,10240,64,0,0.15616533160209656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,10240,2,0,1.6930346488952637
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,8192,1,0,2.215936024983724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,10240,32,0,0.18141865730285645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,10240,4,0,0.8782506783803304
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,12288,16,0,0.39321601390838623
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,10240,2,0,1.6720213890075684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,12288,32,0,0.25019200642903644
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,12288,8,0,0.6621866623560587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,12288,64,0,0.1865440011024475
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,12288,4,0,1.211733341217041
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,10240,1,0,3.36080010732015
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,12288,8,0,0.6615093151728312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,12288,16,0,0.3945600191752116
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,12288,4,0,1.2151467005411785
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,12288,2,0,2.3152640660603843
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,10240,1,0,3.284325281778971
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,12288,32,0,0.24951465924580893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,12288,64,0,0.18721065918604532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,1,16384,16,0,0.6232800086339315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,12288,2,0,2.3268747329711914
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,1,16384,8,0,1.0886826515197754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,1,16384,32,0,0.3901386658350627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,1,16384,64,0,0.2691359917322795
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,1,16384,4,0,2.062335968017578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,12288,1,0,4.512607892354329
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,1,16384,16,0,0.6161066691080729
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,1,16384,8,0,1.099946657816569
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,12288,1,0,4.870175997416179
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,1,16384,64,0,0.26709334055582684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,1,16384,32,0,0.3855466842651367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,1,16384,4,0,2.046293258666992
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,1,16384,2,0,4.003871917724609
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,16,1,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,16,2,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,16,4,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,16,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,16,32,0,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,16,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,16,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,16,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,16,2,0,0.016421332955360413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,16,1,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,16,32,0,0.015050667027632395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,16,8,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,16,16,0,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,16,64,0,0.01470400020480156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,32,2,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,32,1,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,32,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,32,4,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,32,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,32,32,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,32,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,32,1,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,32,8,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,32,2,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,32,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,32,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,1,16384,2,0,4.232565244038899
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,32,64,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,32,32,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,64,4,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,64,1,0,0.0235359991590182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,64,2,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,64,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,64,8,0,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,64,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,64,2,0,0.01741333305835724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,64,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,64,1,0,0.023226665953795116
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,64,32,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,64,8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,64,16,0,0.01676799977819125
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,64,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,128,1,0,0.026954665780067444
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,128,4,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,128,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,128,8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,128,2,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,128,32,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,128,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,128,32,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,128,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,128,8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,128,2,0,0.018794666975736618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,128,4,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,128,1,0,0.025936000049114227
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,128,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,256,16,0,0.021141332884629566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,256,8,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,1,16384,1,0,8.772778828938803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,256,4,0,0.020842666427294414
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,256,2,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,256,1,0,0.03957866628964742
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,256,64,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,256,4,0,0.022895999252796173
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,256,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,256,1,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,256,16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,256,8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,256,32,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,256,2,0,0.0249439999461174
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,512,32,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,512,64,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,512,16,0,0.024890666206677754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,512,4,0,0.030053332448005676
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,512,8,0,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,512,2,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,512,1,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,512,16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,512,8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,512,4,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,512,2,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,1,16384,1,0,8.280239741007486
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,512,1,0,0.08262399832407634
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,1024,8,0,0.03518400092919668
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,1024,32,0,0.0317493329445521
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,1024,64,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,1024,16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,1024,4,0,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,1024,2,0,0.10033067067464192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,1024,32,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,1024,16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,1024,8,0,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,1024,4,0,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,1024,1,0,0.17595734198888144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,1024,2,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,1536,64,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,1024,1,0,0.1764693260192871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,1536,32,0,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,1536,16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,1536,8,0,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,1536,4,0,0.09665600458780925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,1536,32,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,1536,2,0,0.16145066420237222
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,1536,16,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,1536,8,0,0.061759998401006065
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,1536,64,0,0.03922666609287262
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,1536,4,0,0.09827733039855957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,1536,2,0,0.16316266854604086
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,2048,16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,1536,1,0,0.2930346727371216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,2048,8,0,0.08602133393287659
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,2048,32,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,2048,4,0,0.1365333298842112
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,2048,64,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,1536,1,0,0.2930293281873067
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,2048,2,0,0.23381332556406656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,2048,16,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,2048,8,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,2048,32,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,2048,4,0,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,2048,64,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,2048,2,0,0.23347733418146768
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,3072,16,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,3072,32,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,2048,1,0,0.43435200055440265
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,3072,8,0,0.14062399665514627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,3072,64,0,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,3072,4,0,0.23278399308522543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,2048,1,0,0.43060799439748126
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,3072,16,0,0.08944533268610637
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,3072,8,0,0.1418239971001943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,3072,64,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,3072,2,0,0.4241066773732503
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,3072,32,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,3072,4,0,0.23210134108861288
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,4096,16,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,3072,2,0,0.420693318049113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,4096,8,0,0.2106026609738668
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,4096,32,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,3072,1,0,0.7975200017293295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,4096,64,0,0.07578133543332417
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,4096,4,0,0.3592533270517985
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,4096,16,0,0.135343998670578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,4096,8,0,0.20855466524759927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,3072,1,0,0.8026399612426758
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,4096,2,0,0.6628693342208862
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,4096,32,0,0.08055999875068665
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,4096,4,0,0.35583468278249103
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,4096,64,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,6144,16,0,0.22832000255584717
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,6144,8,0,0.3780213197072347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,4096,2,0,0.6683306694030762
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,6144,32,0,0.1462559998035431
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,6144,64,0,0.10514133175214131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,6144,4,0,0.6983413696289062
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,4096,1,0,1.287333329518636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,6144,16,0,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,6144,8,0,0.38178133964538574
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,4096,1,0,1.2842666308085124
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,6144,32,0,0.1495039959748586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,6144,2,0,1.3154933452606201
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,6144,4,0,0.6884640057881674
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,6144,64,0,0.1071519951025645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,8192,16,0,0.3476479848225911
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,8192,8,0,0.6031359831492106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,6144,2,0,1.327445348103841
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,8192,32,0,0.22358399629592896
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,8192,64,0,0.1474613348642985
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,8192,4,0,1.12827730178833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,6144,1,0,2.5847466786702475
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,8192,16,0,0.34799468517303467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,8192,8,0,0.6058933337529501
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,8192,4,0,1.126197338104248
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,8192,2,0,2.1855626106262207
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,6144,1,0,2.5864532788594565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,8192,32,0,0.22426132361094156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,8192,64,0,0.14813333749771118
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,10240,16,0,0.4916906754175822
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,8192,2,0,2.2152533531188965
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,10240,32,0,0.30702932675679523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,10240,8,0,0.8857866923014323
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,10240,64,0,0.19063466787338257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,10240,4,0,1.6766293843587239
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,8192,1,0,4.4754133224487305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,10240,8,0,0.8867839972178141
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,10240,16,0,0.493397315343221
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,10240,4,0,1.6987999280293782
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,10240,32,0,0.30671467383702594
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,10240,64,0,0.19166400035222372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,10240,2,0,3.249333381652832
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,8192,1,0,4.33134396870931
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,12288,16,0,0.6714026927947998
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,10240,2,0,3.390501340230306
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,12288,8,0,1.2195573647816975
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,12288,32,0,0.39798935254414874
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,12288,64,0,0.25460267066955566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,12288,4,0,2.3272159894307456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,10240,1,0,7.001775741577148
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,12288,8,0,1.218565305074056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,12288,4,0,2.3313066164652505
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,12288,32,0,0.3959466616312663
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,12288,16,0,0.6659359931945801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,12288,64,0,0.2549706697463989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,12288,2,0,4.555599848429362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,10240,1,0,7.003477096557617
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,2,16384,16,0,1.0941440264383953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,12288,2,0,4.929381370544434
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,2,16384,8,0,2.0507307052612305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,2,16384,32,0,0.6251519918441772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,2,16384,64,0,0.3959466616312663
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,2,16384,4,0,4.091925303141276
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,12288,1,0,10.086586634318033
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,2,16384,8,0,2.0558506647745767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,2,16384,16,0,1.1129120190938313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,2,16384,4,0,4.001296043395996
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,2,16384,32,0,0.6271786689758301
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,16,2,0,0.01979200045267741
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,16,1,0,0.02903466671705246
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,2,16384,64,0,0.39662933349609375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,16,4,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,16,8,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,12288,1,0,10.06814956665039
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,16,32,0,0.015722667177518208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,16,64,0,0.018885333091020584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,2,16384,2,0,8.491674423217773
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,16,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,16,2,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,16,1,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,16,4,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,16,16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,16,8,0,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,16,32,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,16,64,0,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,32,4,0,0.016885332763195038
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,32,2,0,0.021173333128293354
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,32,1,0,0.03038399914900462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,32,8,0,0.014720000326633453
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,32,16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,32,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,32,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,32,2,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,32,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,32,8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,32,1,0,0.030378667016824085
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,32,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,32,32,0,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,32,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,64,2,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,64,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,64,1,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,64,4,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,64,16,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,64,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,64,2,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,64,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,64,1,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,64,8,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,64,16,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,64,64,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,128,2,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,128,4,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,2,16384,2,0,8.471413294474283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,128,1,0,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,128,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,128,8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,128,32,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,128,64,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,128,8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,128,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,128,4,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,128,16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,128,1,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,128,2,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,128,64,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,256,16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,256,8,0,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,256,4,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,256,2,0,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,256,64,0,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,256,1,0,0.0699786643187205
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,256,4,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,256,16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,256,8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,256,2,0,0.03994133323431015
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,256,64,0,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,256,1,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,512,8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,512,16,0,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,512,4,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,512,2,0,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,512,8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,512,16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,512,4,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,512,1,0,0.1456106702486674
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,512,32,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,512,64,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,512,2,0,0.08329066634178162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,512,1,0,0.14643200238545737
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,1024,16,0,0.038906666139761605
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,1024,8,0,0.06246933341026306
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,1024,32,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,1024,4,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,1024,64,0,0.03312533348798752
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,1024,2,0,0.17869333426157633
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,1024,8,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,1024,16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,1024,4,0,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,1024,64,0,0.03176533430814743
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,1024,32,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,1024,2,0,0.1785279909769694
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,1024,1,0,0.3309226632118225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,1536,8,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,1536,16,0,0.06380799909432729
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,1536,32,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,2,16384,1,0,16.961023966471355
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,1536,64,0,0.041322665909926094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,1024,1,0,0.33126399914423627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,1536,4,0,0.16402666767438254
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,1536,2,0,0.29542932907740277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,1536,16,0,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,1536,8,0,0.09898133079210918
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,1536,32,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,1536,64,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,1536,4,0,0.1634986698627472
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,1536,2,0,0.2964319984118144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,2048,8,0,0.1367039978504181
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,1536,1,0,0.5560373465220133
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,2,16384,1,0,17.032015482584637
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,2048,64,0,0.04784533381462097
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,2048,4,0,0.237226665019989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,2048,16,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,2048,32,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,2048,2,0,0.4346826473871867
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,1536,1,0,0.5573920011520386
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,2048,16,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,2048,8,0,0.1370560030142466
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,2048,64,0,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,2048,32,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,2048,4,0,0.23656533161799112
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,3072,16,0,0.1430186629295349
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,3072,8,0,0.23518933852513632
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,2048,2,0,0.4346880118052165
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,3072,32,0,0.09284800291061401
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,3072,64,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,2048,1,0,0.8427573045094808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,3072,4,0,0.4227413336435954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,3072,16,0,0.1443893313407898
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,3072,8,0,0.2362133264541626
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,3072,32,0,0.0942133367061615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,3072,64,0,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,3072,2,0,0.8063999811808268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,2048,1,0,0.8386507034301758
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,3072,4,0,0.420693318049113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,4096,16,0,0.21196266015370688
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,4096,8,0,0.36061867078145343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,4096,32,0,0.14011200269063315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,3072,2,0,0.8162986437479655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,4096,64,0,0.08737599849700928
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,4096,4,0,0.6690133412679037
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,3072,1,0,1.5764479637145996
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,4096,8,0,0.36061867078145343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,4096,16,0,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,4096,4,0,0.6696906884511312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,4096,32,0,0.13995200395584106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,4096,2,0,1.3013333479563396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,3072,1,0,1.5694506963094075
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,4096,64,0,0.08738133311271667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,6144,16,0,0.3834773302078247
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,4096,2,0,1.308847983678182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,6144,8,0,0.6939573287963867
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,6144,64,0,0.15291733543078104
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,6144,32,0,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,6144,4,0,1.3356374104817708
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,4096,1,0,2.541770617167155
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,6144,16,0,0.3845333258310954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,6144,8,0,0.7017866770426432
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,6144,32,0,0.23243733247121176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,6144,4,0,1.3253973325093586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,6144,64,0,0.1532586713631948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,4096,1,0,2.5267093976338706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,6144,2,0,2.580986658732096
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,4,8192,16,0,0.6133706569671631
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,4,8192,8,0,1.1340800126393635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,6144,2,0,2.622122605641683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,4,8192,32,0,0.35516266028086346
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,4,8192,64,0,0.2307413419087728
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,4,8192,4,0,2.2121547063191733
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,6144,1,0,5.156026522318522
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,4,8192,8,0,1.1473920345306396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,4,8192,4,0,2.1828266779581704
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,4,8192,32,0,0.3551573355992635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,4,8192,16,0,0.61407999197642
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,16,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,16,1,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,4,8192,64,0,0.2310826579729716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,16,4,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,16,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,6144,1,0,5.355712254842122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,4,8192,2,0,4.545050621032715
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,16,16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,16,32,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,16,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,16,4,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,16,8,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,16,1,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,16,16,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,16,2,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,16,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,16,64,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,32,1,0,0.046426668763160706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,32,2,0,0.030373332401116688
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,32,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,32,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,32,32,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,32,4,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,32,64,0,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,32,4,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,32,2,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,32,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,32,1,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,32,32,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,32,64,0,0.016688000410795212
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,32,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,64,2,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,64,8,0,0.018415999909241993
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,64,4,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,64,1,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,64,32,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,64,64,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,64,16,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,64,2,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,64,8,0,0.01775466650724411
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,64,4,0,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,64,1,0,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,64,32,0,0.015002666662136713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,64,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,4,8192,2,0,4.65118408203125
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,64,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,128,8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,128,4,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,128,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,128,2,0,0.042319998145103455
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,128,1,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,128,32,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,128,64,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,128,4,0,0.026629333694775898
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,128,2,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,128,32,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,128,8,0,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,128,16,0,0.018746666610240936
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,128,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,128,1,0,0.06860266625881195
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,256,16,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,256,4,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,256,64,0,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,256,8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,256,2,0,0.06995200117429097
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,256,16,0,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,256,1,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,256,4,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,256,2,0,0.07030400137106578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,256,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,256,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,256,1,0,0.12153066198031108
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,512,16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,512,32,0,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,512,8,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,512,4,0,0.08292266726493835
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,512,2,0,0.14591466387112936
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,512,8,0,0.05051200091838837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,512,16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,512,32,0,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,512,64,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,512,4,0,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,512,1,0,0.27051732937494916
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,512,2,0,0.1467680037021637
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,4,8192,1,0,9.4923095703125
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,1024,16,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,512,1,0,0.2711893320083618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,1024,64,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,1024,8,0,0.10307733217875163
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,1024,32,0,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,1024,4,0,0.17919999361038208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,1024,16,0,0.0659093310435613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,1024,8,0,0.10478400190671285
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,1024,2,0,0.33398934205373126
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,1024,64,0,0.03346133232116699
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,1024,32,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,1024,4,0,0.1795413295427958
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,1024,2,0,0.3343413273493449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,1536,8,0,0.16690667470296225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,1536,32,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,4,8192,1,0,9.46397844950358
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,1536,16,0,0.10204266508420308
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,1024,1,0,0.6381226778030396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,1536,64,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,1536,4,0,0.29747732480367023
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,1536,16,0,0.10171199838320415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,1024,1,0,0.6405066649119059
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,1536,8,0,0.16793600718180338
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,1536,2,0,0.5638879934946696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,1536,64,0,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,1536,32,0,0.06619200110435486
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,1536,4,0,0.29712533950805664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,2048,16,0,0.1431893308957418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,2048,32,0,0.0935040016969045
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,2048,8,0,0.24096532662709555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,1536,2,0,0.5611573457717896
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,2048,64,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,2048,4,0,0.441866676012675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,1536,1,0,1.0934560298919678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,2048,16,0,0.1418239971001943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,2048,8,0,0.24030399322509766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,2048,2,0,0.8447999954223633
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,2048,64,0,0.057674666245778404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,2048,4,0,0.43809600671132404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,2048,32,0,0.09078933795293172
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,1536,1,0,1.1019999980926514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,3072,16,0,0.24268800020217896
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,2048,2,0,0.842415968577067
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,3072,8,0,0.42922667662302655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,3072,32,0,0.1493280033270518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,3072,64,0,0.10068800052007039
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,3072,4,0,0.8087999820709229
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,2048,1,0,1.645055929819743
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,3072,16,0,0.2430186669031779
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,3072,8,0,0.42785600821177167
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,3072,32,0,0.1493333379427592
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,3072,64,0,0.10035733381907146
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,3072,4,0,0.8111786842346191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,2048,1,0,1.6556320190429688
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,3072,2,0,1.5941920280456543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,8,4096,16,0,0.36982933680216473
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,8,4096,8,0,0.6785600185394287
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,8,4096,32,0,0.21914132436116537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,3072,2,0,1.6081867218017578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,8,4096,64,0,0.14727999766667685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,8,4096,4,0,1.2989386717478435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,3072,1,0,3.1332693099975586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,8,4096,8,0,0.6775519847869873
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,8,4096,16,0,0.36813334623972577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,8,4096,32,0,0.2214933236440023
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,8,4096,4,0,1.299621343612671
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,16,2,0,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,16,1,0,0.07339199880758922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,8,4096,2,0,2.533888022104899
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,8,4096,64,0,0.14660267035166422
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,16,4,0,0.029343999922275543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,16,8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,16,32,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,16,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,3072,1,0,3.138042767842611
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,16,64,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,16,4,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,16,2,0,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,16,8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,16,1,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,16,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,16,32,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,16,64,0,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,32,8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,32,4,0,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,32,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,32,2,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,32,1,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,32,32,0,0.015706667055686314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,32,64,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,32,8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,32,2,0,0.04775999983151754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,32,1,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,32,4,0,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,32,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,32,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,32,64,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,64,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,64,16,0,0.017029333859682083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,64,4,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,64,2,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,64,1,0,0.09693333506584167
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,64,64,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,64,4,0,0.033786666889985405
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,64,8,0,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,64,2,0,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,64,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,64,1,0,0.09660266836484273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,8,4096,2,0,2.5820107460021973
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,64,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,64,32,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,128,16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,128,32,0,0.018746666610240936
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,128,8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,128,4,0,0.04264533519744873
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,128,64,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,128,2,0,0.06995200117429097
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,128,16,0,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,128,8,0,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,128,1,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,128,4,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,128,32,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,128,2,0,0.06929600238800049
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,128,64,0,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,256,16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,128,1,0,0.11947733163833618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,256,8,0,0.04197866717974345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,256,32,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,256,64,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,256,4,0,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,256,2,0,0.12288000186284383
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,256,8,0,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,256,16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,256,4,0,0.07133866846561432
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,256,32,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,256,64,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,256,2,0,0.12219732999801636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,256,1,0,0.2259626587231954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,512,16,0,0.0529013325770696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,512,8,0,0.08566932876904805
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,256,1,0,0.23040533065795898
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,8,4096,1,0,5.168314615885417
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,512,32,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,512,64,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,512,4,0,0.1474560002485911
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,512,16,0,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,512,8,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,512,2,0,0.27289066712061566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,512,64,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,512,32,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,512,4,0,0.14847466349601746
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,512,2,0,0.27426133553187054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,1024,8,0,0.18516800800959268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,512,1,0,0.5266826550165812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,1024,16,0,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,1024,32,0,0.06929600238800049
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,1024,64,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,1024,4,0,0.33638401826222736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,8,4096,1,0,5.268325487772624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,512,1,0,0.5294026533762614
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,1024,16,0,0.10786666472752889
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,1024,32,0,0.07030933101971944
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,1024,64,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,1024,8,0,0.18500266472498575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,1024,2,0,0.6463199853897095
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,1024,4,0,0.3380906581878662
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,1536,16,0,0.17561066150665283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,1536,8,0,0.30565865834554035
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,1536,32,0,0.1085599958896637
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,1024,2,0,0.6435840129852295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,1536,64,0,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,1536,4,0,0.565936009089152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,1024,1,0,1.2526933352152507
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,1536,8,0,0.3063466747601827
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,1536,16,0,0.1763040026028951
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,1536,32,0,0.1081813375155131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,1536,4,0,0.5703680117925009
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,1536,64,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,1024,1,0,1.255072037378947
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,1536,2,0,1.0996267000834148
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,16,2048,16,0,0.2501973311106364
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,16,2048,32,0,0.1495039959748586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,1536,2,0,1.0921013355255127
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,16,2048,8,0,0.4452693462371826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,16,2048,64,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,16,2048,4,0,0.8482133547465006
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,1536,1,0,2.187605381011963
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,16,2048,8,0,0.4456053177515666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,16,2048,4,0,0.854693333307902
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,16,2048,16,0,0.2501973311106364
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,16,2048,32,0,0.15155200163523355
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,32,16,1,0,0.13498666882514954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,16,2048,2,0,1.6590506235758464
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,16,2048,64,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,16,2,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,16,8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,1536,1,0,2.1568800608317056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,16,4,0,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,16,16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,16,32,0,0.01708799973130226
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,16,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,16,16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,16,8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,16,4,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,16,2,0,0.07409066458543141
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,32,16,1,0,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,16,32,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,16,64,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,32,8,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,32,4,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,32,16,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,32,2,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,32,32,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,32,32,1,0,0.14201066891352335
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,32,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,32,4,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,32,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,32,2,0,0.08123200138409932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,32,8,0,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,32,32,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,32,32,1,0,0.14404267072677612
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,32,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,64,4,0,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,64,8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,64,16,0,0.023567999402681988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,16,2048,2,0,1.6501866976420085
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,64,32,0,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,64,2,0,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,64,64,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,64,16,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,64,4,0,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,64,8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,32,64,1,0,0.17100799083709717
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,64,32,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,64,2,0,0.09353599945704143
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,64,64,0,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,32,64,1,0,0.17084266742070517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,128,16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,128,8,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,128,32,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,128,64,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,128,4,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,128,2,0,0.11979200442632039
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,128,8,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,128,16,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,128,4,0,0.06995733578999837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,128,64,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,128,32,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,128,2,0,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,32,128,1,0,0.2205066680908203
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,256,16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,32,128,1,0,0.22050132354100546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,256,8,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,256,32,0,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,256,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,256,4,0,0.1269653340180715
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,256,8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,256,2,0,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,256,4,0,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,256,16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,256,32,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,256,64,0,0.023573334018389385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,16,2048,1,0,3.3054774602254233
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,256,2,0,0.22971733411153158
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,32,256,1,0,0.4398080110549927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,512,16,0,0.09010666608810425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,512,8,0,0.15204266707102457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,512,64,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,512,32,0,0.057706668972969055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,16,2048,1,0,3.25545597076416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,512,4,0,0.27767467498779297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,32,256,1,0,0.434005339940389
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,512,32,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,512,16,0,0.0897653301556905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,512,64,0,0.03514666606982549
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,512,8,0,0.1534346640110016
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,512,2,0,0.5365866820017496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,512,4,0,0.2783573269844055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,32,1024,16,0,0.19319466749827066
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,512,2,0,0.53111465771993
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,32,1024,32,0,0.1160533328851064
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,32,1024,8,0,0.3503733476003011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,32,1024,64,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,32,1024,4,0,0.6524693171183268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,32,512,1,0,1.0333813031514485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,32,1024,16,0,0.19421867529551187
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,32,512,1,0,1.0302986303965251
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,32,1024,8,0,0.34458665053049725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,32,1024,32,0,0.11673600474993388
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,32,1024,64,0,0.07785066465536754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,64,16,1,0,0.24542933702468872
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,16,4,0,0.07406400144100189
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,16,2,0,0.13447999954223633
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,16,8,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,16,16,0,0.029370665550231934
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,16,32,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,16,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,32,1024,4,0,0.6550186475118002
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,16,4,0,0.07409066458543141
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,16,8,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,32,1024,2,0,1.275055964787801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,16,2,0,0.1353386640548706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,16,16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,16,32,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,16,64,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,64,16,1,0,0.2457546591758728
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,32,16,0,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,32,8,0,0.0470719983180364
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,32,4,0,0.08158400158087413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,32,2,0,0.14268799622853598
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,32,32,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,32,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,64,32,1,0,0.26842667659123737
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,32,8,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,32,2,0,0.14250666896502176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,32,4,0,0.08260799944400787
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,32,64,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,32,32,0,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,32,16,0,0.032431999842325844
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,64,32,1,0,0.26742400725682575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,64,8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,64,16,0,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,64,4,0,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,64,64,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,64,32,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,32,1024,2,0,1.284272034962972
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,64,2,0,0.1694773236910502
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,64,16,0,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,64,8,0,0.05806399881839752
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,64,32,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,64,4,0,0.0962506632010142
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,64,64,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,64,64,1,0,0.3278506596883138
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,64,2,0,0.16947199900945029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,128,16,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,128,8,0,0.07166933516661327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,128,32,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,64,64,1,0,0.3275039990743001
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,128,64,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,128,4,0,0.12219199538230896
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,128,8,0,0.07171200215816498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,128,2,0,0.2235520084698995
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,128,32,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,128,4,0,0.12219732999801636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,128,16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,128,64,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,128,2,0,0.22323199113210043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,32,1024,1,0,2.488149325052897
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,64,128,1,0,0.4251146713892619
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,256,16,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,256,32,0,0.05119466781616211
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,256,8,0,0.13260799646377563
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,256,64,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,256,4,0,0.23756800095240274
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,64,128,1,0,0.4261546532313029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,256,16,0,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,256,8,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,256,32,0,0.05051200091838837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,256,2,0,0.4517600138982137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,32,1024,1,0,2.5077813466389975
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,256,64,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,256,4,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,64,512,16,0,0.1628213326136271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,256,2,0,0.44253333409627277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,64,512,32,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,64,512,8,0,0.28757866223653156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,64,512,64,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,64,256,1,0,0.8707413673400879
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,64,512,4,0,0.5393013159434
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,64,256,1,0,0.8608427047729492
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,64,512,16,0,0.16351999839146933
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,64,512,8,0,0.2868959903717041
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,64,512,64,0,0.06723733246326447
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,64,512,32,0,0.09899200002352397
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,64,512,4,0,0.5410399834314982
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,16,4,0,0.13499200344085693
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,128,16,1,0,0.47394665082295734
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,16,2,0,0.24713067213694254
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,16,16,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,16,8,0,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,16,64,0,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,16,32,0,0.030042665700117748
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,16,8,0,0.07407466570536296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,16,4,0,0.13500266273816428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,64,512,2,0,1.0552159945170085
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,16,2,0,0.24778666098912558
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,16,32,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,16,16,0,0.045050665736198425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,16,64,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,128,16,1,0,0.47496533393859863
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,32,8,0,0.08362133304278056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,32,16,0,0.047456001242001854
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,32,4,0,0.14421332875887552
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,32,64,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,32,32,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,32,2,0,0.26709334055582684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,32,8,0,0.08292800188064575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,32,4,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,32,16,0,0.048437332113583885
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,32,32,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,128,32,1,0,0.5210453271865845
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,32,2,0,0.2667520046234131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,32,64,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,64,512,2,0,1.0432853698730469
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,64,16,0,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,64,8,0,0.09559466441472371
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,128,32,1,0,0.5208746592203776
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,64,32,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,64,64,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,64,4,0,0.17186667521794638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,64,8,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,64,2,0,0.32307199637095135
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,64,4,0,0.1718613306681315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,64,16,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,64,32,0,0.036517334481080375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,64,64,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,64,2,0,0.3265013297398885
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,128,64,1,0,0.6463199853897095
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,128,8,0,0.12834133704503378
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,128,4,0,0.22801599899927774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,128,16,0,0.07645333309968312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,128,64,1,0,0.6446133454640707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,64,512,1,0,2.0507307052612305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,128,64,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,128,32,0,0.05120533208052317
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,128,16,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,128,8,0,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,128,32,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,128,2,0,0.4333226680755615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,128,64,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,128,4,0,0.22801067431767783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,64,512,1,0,2.0500426292419434
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,128,256,16,0,0.14130666851997375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,128,2,0,0.4322933355967204
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,128,256,8,0,0.24542399247487387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,128,256,32,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,128,256,64,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,128,128,1,0,0.8403626283009847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,128,256,4,0,0.45415465037027997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,128,256,16,0,0.14081066846847534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,128,256,8,0,0.24675732851028442
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,128,128,1,0,0.8393386999766032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,128,256,32,0,0.08941866954167683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,128,256,64,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,128,256,4,0,0.451749324798584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,256,16,4,0,0.24780799945195517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,256,16,2,0,0.4780373175938924
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,256,16,16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,256,16,8,0,0.1358506679534912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,256,16,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,256,16,32,0,0.0460746685663859
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,128,256,2,0,0.8724479675292969
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,256,16,1,0,0.9501012961069742
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,256,16,8,0,0.13566933075586954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,256,16,16,0,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,256,16,4,0,0.2491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,256,16,32,0,0.046069333950678505
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,256,16,64,0,0.03274133304754893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,256,16,2,0,0.4756480058034261
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,256,32,8,0,0.14643200238545737
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,256,16,1,0,0.933626651763916
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,256,32,4,0,0.2715359926223755
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,256,32,32,0,0.050479998191197716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,256,32,16,0,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,256,32,64,0,0.03517866631348928
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,128,256,2,0,0.8738133112589518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,256,32,2,0,0.5215573310852051
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,256,32,8,0,0.146096001068751
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,256,32,16,0,0.0846613347530365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,256,32,4,0,0.2711893320083618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,256,32,32,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,256,32,64,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,256,32,2,0,0.522213339805603
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,256,32,1,0,1.032362699508667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,256,64,16,0,0.10274666547775269
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,256,64,8,0,0.17765865723292032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,256,64,32,0,0.06413866579532623
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,256,64,4,0,0.3309226632118225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,256,64,64,0,0.04199466605981191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,256,32,1,0,1.0275786717732747
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,128,256,1,0,1.7334613800048828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,256,64,8,0,0.17866667111714682
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,256,64,2,0,0.6398293177286783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,256,64,16,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,256,64,64,0,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,256,64,32,0,0.06313600142796834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,256,64,4,0,0.33023999134699505
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,128,256,1,0,1.7191252708435059
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,256,64,2,0,0.6381066640218099
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,16,256,128,8,0,0.23928000529607138
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,4,256,128,32,0,0.08703466256459554
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,2,256,128,64,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,8,256,128,16,0,0.13823999961217245
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,256,64,1,0,1.2822026411692302
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,32,256,128,4,0,0.4401386578877767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,8,256,128,16,0,0.13858133554458618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,16,256,128,8,0,0.23789334297180176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,256,64,1,0,1.2791519959767659
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,2,256,128,64,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,4,256,128,32,0,0.08703466256459554
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,32,256,128,4,0,0.43912001450856525
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,64,256,128,2,0,0.8540159861246744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,64,256,128,2,0,0.8523200352986654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,fp8,128,256,128,1,0,1.6682666142781575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_context,default,float16,float16,128,256,128,1,0,1.6676799456278484
