framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,1,0.02645866572856903
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,1,0.02146666745344798
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,1,0.025733334322770435
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,1,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,1,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,1,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,1,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,1,0.025781333446502686
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,1,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,1,0.024885334074497223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,1,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,1,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,1,0.02491733431816101
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,1,0.024714666108290356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,1,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,3,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,3,0.02629333237806956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,1,0.02474133421977361
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,3,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,3,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,3,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,3,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,3,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,3,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,3,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,3,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,3,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,3,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,3,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,3,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,3,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,7,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,3,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,7,0.0264533335963885
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,7,0.022261333962281544
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,7,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,7,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,7,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,7,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,7,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,7,0.02608533451954524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,7,3.7694241205851235
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,7,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,7,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,7,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,7,0.02480533222357432
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,7,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,7,0.02479466547568639
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,15,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,15,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,15,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,15,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,15,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,15,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,15,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,15,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,15,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,15,0.02475733309984207
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,15,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,15,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,15,0.024501333634058636
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,15,0.02481066683928172
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,31,0.02651199946800868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,15,0.02483733246723811
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,31,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,15,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,31,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,31,0.024800000091393787
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,31,0.02493866781393687
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,31,0.024634666740894318
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,31,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,31,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,31,0.024832000335057575
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,31,0.025968000292778015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,31,0.02436800052722295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,31,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,31,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,31,0.024234667420387268
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,31,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,63,0.024618667860825855
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,63,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,63,0.024469333390394848
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,63,0.0204373337328434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,31,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,63,0.024117333193620045
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,63,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,63,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,63,0.024090667565663654
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,63,0.019813333948453266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,63,0.02439466615517934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,63,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,63,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,63,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,63,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,63,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,63,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,127,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,127,0.02643733223279317
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,127,0.02632533262173335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,127,0.027973333994547527
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,127,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,127,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,127,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,127,0.022709332406520844
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,127,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,127,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,127,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,127,0.025989333788553875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,127,0.026154667139053345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,127,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,127,0.024746666351954143
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,127,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,255,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,255,0.026373334228992462
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,255,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,255,0.030181333422660828
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,255,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,255,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,255,0.026165333886941273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,255,0.026250667870044708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,255,0.022682666778564453
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,255,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,255,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,255,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,255,0.02699200063943863
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,255,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,255,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,255,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,511,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,511,0.02845866729815801
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,511,0.025909334421157837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,511,0.026687999566396076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,511,0.026949333647886913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,511,0.025957333544890087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,511,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,511,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,511,0.02788266787926356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,511,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,511,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,511,0.02621866762638092
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,511,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,1023,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,511,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,511,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,511,0.02568000058333079
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,1023,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,1023,0.0269813338915507
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,1023,0.028399998943010967
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,1023,0.027855999767780304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,1023,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,1023,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,1023,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,1023,0.028991999725500744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,1023,0.025578667720158894
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,1023,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,1023,0.027653334041436512
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,1023,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,1023,0.026778665681680042
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,1023,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,1023,0.026863999664783478
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,2047,0.03263466556866964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,2047,0.02865600089232127
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,2047,0.03121600051720937
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,2047,0.03011200080315272
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,2047,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,2047,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,2047,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,2047,0.030053332448005676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,2047,0.03253866732120514
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,2047,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,2047,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,2047,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,4095,0.03634133438269297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,2047,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,2047,0.028538666665554047
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,2047,0.02826666583617528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,4095,0.03002133220434189
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,4095,0.034714666505654655
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,2047,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,4095,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,4095,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,4095,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,4095,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,4095,0.028959999481836956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,4095,0.03633599976698557
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,4095,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,4095,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,4095,0.03595199932654699
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,4095,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,4095,0.03243733445803324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,4095,0.032469332218170166
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,4095,0.03278400003910065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,8191,0.03630933413902918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,8191,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,8191,0.9469119707743326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,8191,0.03618666778008143
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,8191,0.033157333731651306
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,8191,0.03644266724586487
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,8191,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,8191,0.0359199990828832
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,8191,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,8191,0.03626666714747747
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,8191,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,8191,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,8191,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,8191,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,8191,0.03489066660404205
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,8191,0.034202667574087776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,16383,0.0378560001651446
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,16383,0.047797332207361855
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,16383,0.044031997521718345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,16383,0.0444160004456838
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,16383,0.04558399816354116
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,16383,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,16383,0.04636266827583313
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,16383,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,16383,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,16383,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,16383,0.0439573327700297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,16383,0.04310933252175649
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,16383,0.042352000872294106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,16383,0.04246933261553446
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,16383,0.04236799975236257
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,16383,0.0421973317861557
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,32767,0.0489279975493749
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,32767,0.05686933298905691
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,32767,0.048325334986050926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,32767,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,32767,0.04754666487375895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,32767,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,32767,0.04747733473777771
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,32767,0.047370667258898415
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,32767,0.04980266590913137
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,32767,0.043968002001444496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,32767,0.045642669002215065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,32767,0.04604266583919525
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,32767,0.04725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,32767,0.04520533482233683
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,32767,0.04541333516438802
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,32767,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,65535,0.0641599992911021
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,65535,0.06158400078614553
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,65535,0.06748799979686737
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,65535,0.06233599781990051
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,65535,0.06419733166694641
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,65535,0.06293866535027821
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,65535,0.06177600224812826
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,65535,0.05904533465703329
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,65535,0.05871466795603434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,65535,0.061706667145093284
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,65535,0.05232533315817515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,65535,0.055344000458717346
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,65535,0.05195199946562449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,65535,0.05100266635417938
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,65535,0.05138133466243744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,65535,0.05128000179926554
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,131071,0.0926080048084259
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,131071,0.08522133032480876
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,131071,0.08142933249473572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,131071,0.08004266520341237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,131071,0.0790773332118988
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,131071,0.07851733267307281
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,131071,0.07947200040022533
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,128,131071,0.0773173322280248
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,131071,0.07567466795444489
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,128,131071,0.06675200164318085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,131071,0.08267733454704285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,131071,0.0687253326177597
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,131071,0.06715199848016103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,131071,0.06739733119805653
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,131071,0.06669866542021434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,131071,0.06786666810512543
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,1,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,1,0.02272533377011617
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,1,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,1,0.025914666553338368
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,1,0.026047999660174053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,1,0.025861332813898723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,1,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,1,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,1,0.022645334402720135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,1,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,1,0.02645866572856903
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,1,0.02590399980545044
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,1,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,1,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,1,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,1,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,3,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,3,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,3,0.026591998835404713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,3,0.026250667870044708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,3,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,3,0.026357332865397137
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,3,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,3,0.02585600068171819
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,3,0.02242133269707362
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,3,0.026895999908447266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,7,0.022202665607134502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,3,0.02619733413060506
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,3,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,3,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,3,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,3,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,3,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,7,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,7,0.02603200078010559
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,7,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,7,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,7,0.025653332471847534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,7,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,7,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,7,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,7,0.02276800076166789
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,7,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,7,0.025936000049114227
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,7,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,7,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,7,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,7,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,15,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,15,0.026661333938439686
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,15,0.025770666698614757
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,15,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,15,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,15,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,15,0.024890666206677754
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,15,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,15,0.02625600000222524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,15,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,15,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,31,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,15,0.024874667326609295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,15,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,15,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,15,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,15,0.024570666253566742
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,31,0.02622933437426885
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,31,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,31,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,31,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,31,0.024656000236670177
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,31,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,31,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,31,0.028431999186674755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,31,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,31,0.024533333877722423
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,31,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,31,0.024661332368850708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,31,0.02407466620206833
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,31,0.02439466615517934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,63,0.0262773334980011
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,31,0.023930666347344715
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,63,0.024458666642506916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,63,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,63,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,63,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,63,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,63,0.024170666933059692
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,63,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,63,0.024218666056791942
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,63,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,63,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,63,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,63,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,63,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,63,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,127,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,63,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,127,0.028714666763941448
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,127,0.02677333354949951
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,127,0.025962665677070618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,127,0.02641066660483678
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,127,0.026309333741664886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,127,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,127,0.02794666588306427
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,127,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,127,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,127,0.026250667870044708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,127,0.025637333591779072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,127,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,127,0.025802666942278545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,255,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,255,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,127,0.024890666206677754
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,255,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,127,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,255,0.028165332973003387
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,255,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,255,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,255,0.026047999660174053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,255,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,255,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,255,0.0262719988822937
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,255,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,255,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,255,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,255,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,255,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,255,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,511,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,511,0.02809600035349528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,511,0.028890666862328846
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,511,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,511,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,511,0.026890667776266735
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,511,0.026837334036827087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,511,0.02658133457104365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,511,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,511,0.02849599967400233
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,511,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,511,0.026848000784715016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,511,0.026602665583292644
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,511,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,511,0.02629866699377696
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,1023,0.02993600070476532
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,511,0.02641066660483678
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,1023,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,1023,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,1023,0.03205333401759466
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,1023,0.028991999725500744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,1023,0.02897600084543228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,1023,0.028570666909217834
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,1023,0.028666667640209198
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,1023,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,1023,0.029850666721661884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,1023,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,1023,0.02889599899450938
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,1023,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,1023,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,1023,0.028368001182874043
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,2047,0.03503466645876566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,1023,0.028234665592511494
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,2047,0.02855466554562251
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,2047,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,2047,0.032373333970705666
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,2047,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,2047,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,2047,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,2047,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,2047,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,2047,0.03182400017976761
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,2047,0.032816000282764435
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,2047,0.03435733417669932
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,2047,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,2047,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,2047,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,2047,0.03088533381621043
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,4095,0.0322026660044988
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,4095,0.035045333206653595
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,4095,0.03422933320204417
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,4095,0.036901332437992096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,4095,0.03416533271471659
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,4095,0.03402666747570038
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,4095,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,4095,0.0348693331082662
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,4095,0.03445333242416382
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,4095,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,4095,0.03583999971548716
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,4095,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,4095,0.03277866790692011
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,4095,0.03294399877389272
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,4095,0.03311466674009959
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,4095,0.03270400067170461
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,8191,0.036559998989105225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,8191,0.042549331982930504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,8191,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,8191,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,8191,0.03894400099913279
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,8191,0.03829866647720337
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,8191,0.038560000558694206
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,8191,0.035904000202814736
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,8191,0.03801066676775614
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,8191,0.04077333211898804
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,8191,1.1379786332448323
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,8191,0.03870933254559835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,8191,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,8191,0.03643733263015747
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,8191,0.036474667489528656
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,16383,0.049039999643961586
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,8191,0.03691199918588003
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,16383,0.053317333261171974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,16383,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,16383,0.0488373339176178
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,16383,0.045370668172836304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,16383,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,16383,0.044495999813079834
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,16383,0.044106667240460716
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,16383,0.041989331444104515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,16383,0.04806933303674062
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,16383,0.047210668524106346
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,16383,0.042591998974482216
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,16383,0.04714666803677877
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,16383,0.04241600135962168
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,16383,0.0418453315893809
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,16383,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,32767,0.06203199923038483
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,32767,0.060175999999046326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,32767,0.06520000100135803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,32767,0.059018666545550026
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,32767,0.05942399799823761
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,32767,0.05831466615200043
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,32767,0.058229332168896995
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,32767,0.0561706672112147
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,32767,0.05592533449331919
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,32767,0.058143998185793556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,32767,0.05160533388455709
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,32767,0.04990399877230326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,32767,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,32767,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,32767,0.04727466901143392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,65535,0.08970666925112407
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,32767,0.04799466828505198
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,65535,0.08227733274300893
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,65535,0.07670933504899342
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,65535,0.07839466631412506
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,65535,0.07563733557860057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,65535,0.0751146674156189
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,65535,0.07489599784215291
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,65535,0.07720533510049184
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,65535,0.06905599931875865
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,65535,0.08056533336639404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,65535,0.06674133241176605
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,65535,0.06579733391602834
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,131071,0.14542933305104574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,65535,0.06820799907048543
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,65535,0.06381333371003468
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,65535,0.06436266501744588
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,65535,0.06280000011126201
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,131071,0.1163146694501241
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,131071,0.11191466450691223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,131071,0.10940800110499065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,131071,0.10897066195805867
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,131071,0.10770666599273682
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,128,131071,0.1090133289496104
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,131071,0.13100266456604004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,131071,0.10945066809654236
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,131071,0.09388800462086995
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,131071,0.09039466579755147
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,131071,0.08936533331871033
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,131071,0.08927999933560689
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,131071,0.08793600400288899
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,1,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,131071,0.08842133482297261
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,1,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,128,131071,0.08871466914812724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,1,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,1,0.026634665826956432
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,1,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,1,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,1,0.02585600068171819
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,1,0.026021334032217663
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,1,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,1,0.026021334032217663
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,1,0.026650667190551758
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,1,0.028218666712443035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,1,0.02609066665172577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,3,0.026714667677879333
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,1,0.025818665822347004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,1,0.03309333324432373
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,3,0.027952000498771667
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,3,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,3,0.02643733223279317
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,1,0.026047999660174053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,3,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,3,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,3,0.02611733227968216
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,3,0.026074667771657307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,3,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,3,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,3,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,3,0.02613866577545802
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,3,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,3,0.025637333591779072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,3,0.025685332715511322
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,3,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,7,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,7,0.023925334215164185
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,7,0.02672533442576726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,7,0.025861332813898723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,7,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,7,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,7,0.025637333591779072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,7,0.025802666942278545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,7,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,7,0.022202665607134502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,7,0.02604266752799352
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,7,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,7,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,7,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,7,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,7,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,15,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,15,0.022272000710169475
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,15,0.02624533325433731
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,15,0.025701334079106648
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,15,0.026000000536441803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,15,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,15,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,15,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,15,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,15,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,15,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,15,0.024618667860825855
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,15,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,15,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,15,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,31,0.025957333544890087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,31,0.026672000686327618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,15,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,31,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,31,0.9511573314666748
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,31,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,31,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,31,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,31,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,31,0.021941334009170532
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,31,0.026485333840052288
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,31,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,31,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,31,0.024671999116738636
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,31,0.02443733314673106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,31,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,63,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,31,0.024608001112937927
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,63,0.026047999660174053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,63,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,63,1.062117338180542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,63,0.024282666544119518
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,63,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,63,0.02446399877468745
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,63,0.024559999505678814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,63,0.02067199970285098
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,63,0.024138666689395905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,63,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,63,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,63,0.023962666591008503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,63,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,63,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,63,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,127,0.028597332537174225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,127,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,127,0.024874667326609295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,127,0.026309333741664886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,127,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,127,0.02699200063943863
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,127,0.026021334032217663
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,127,0.8835466702779134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,127,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,127,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,127,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,127,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,127,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,127,0.02593066543340683
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,127,0.026202666262785595
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,255,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,127,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,255,0.030447999636332195
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,255,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,255,0.028389332195123036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,255,0.026746665438016255
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,255,0.02698666602373123
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,255,0.026394667724768322
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,255,0.026821332673231762
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,255,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,255,0.028330666323502857
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,255,0.02632533262173335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,255,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,255,0.026191999514897663
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,255,0.026309333741664886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,255,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,255,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,511,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,511,0.028357334434986115
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,511,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,511,0.028346667687098186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,511,0.02906133234500885
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,511,0.028410665690898895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,511,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,511,0.028325334191322327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,511,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,511,0.03221333275238673
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,511,0.028234665592511494
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,511,0.028864001234372456
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,511,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,511,0.03748266647259394
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,511,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,511,0.027802666028340656
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,1023,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,1023,0.028954667349656422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,1023,0.03183466692765554
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,1023,0.03254399945338567
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,1023,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,1023,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,1023,0.03086400032043457
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,1023,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,1023,0.028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,1023,0.030671998858451843
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,1023,0.03253333270549774
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,1023,0.03050133337577184
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,1023,0.030261332790056866
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,1023,0.03049066662788391
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,1023,0.03067733347415924
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,2047,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,2047,0.03608000030120214
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,1023,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,2047,0.035205334424972534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,2047,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,2047,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,2047,0.032698666055997215
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,2047,0.031850665807724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,2047,0.03271999955177307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,2047,0.032746667663256325
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,2047,0.03577066709597906
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,2047,0.03266666581233343
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,2047,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,2047,0.032111999889214836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,2047,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,2047,0.032144000132878624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,2047,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,4095,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,4095,0.0367253323396047
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,4095,0.03806400050719579
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,4095,0.03815466662247976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,4095,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,4095,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,4095,0.03719466676314672
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,4095,0.03685333331425985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,4095,0.03745600084463755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,4095,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,4095,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,4095,0.036202666660149894
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,4095,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,4095,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,4095,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,4095,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,8191,0.04774933556715647
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,8191,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,8191,0.05049600203831991
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,8191,0.0422026664018631
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,8191,0.04436799883842468
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,8191,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,8191,0.04348266621430715
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,8191,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,8191,0.04330666859944662
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,8191,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,8191,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,8191,0.04035733391841253
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,8191,0.04038933416207632
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,8191,0.04037333279848099
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,8191,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,16383,0.061530664563179016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,16383,0.056943997740745544
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,16383,0.06554666658242543
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,8191,0.03992533435424169
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,16383,0.05853333572546641
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,16383,0.056517332792282104
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,16383,0.05675200124581655
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,16383,0.056549335519472756
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,16383,0.05453333258628845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,16383,0.05615466833114624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,16383,0.056688000758488975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,16383,0.049551998575528465
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,16383,0.047093331813812256
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,16383,0.04677333434422811
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,16383,0.04642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,16383,0.04628799855709076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,16383,0.046256000796953835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,32767,0.0892639954884847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,32767,0.076773335536321
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,32767,0.08083199958006541
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,32767,0.07412266731262207
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,32767,0.07409599920113881
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,32767,0.07389333347479503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,32767,0.07335466643174489
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,32767,0.06155199805895487
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,32767,0.07387733459472656
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,32767,0.08266133566697438
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,32767,0.06712000072002411
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,32767,0.06371200084686279
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,32767,0.06298133234182994
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,32767,0.06302399933338165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,32767,0.06158933540185293
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,32767,0.0618399977684021
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,65535,0.14499200383822122
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,65535,0.1144533356030782
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,65535,0.10985066493352254
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,65535,0.1067626674969991
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,65535,0.10959999759991963
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,65535,0.10730666915575664
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,65535,0.10685867071151733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,65535,0.1051573355992635
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,65535,0.09248000383377075
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,65535,0.13077867031097412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,65535,0.08763733506202698
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,65535,0.08720533053080241
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,65535,0.08853866656621297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,65535,0.08727999528249104
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,65535,0.08699733018875122
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,65535,0.08805867036183675
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,131071,0.2539413372675578
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,131071,0.17398399114608765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,131071,0.1841813325881958
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,131071,0.175818661848704
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,131071,0.17444799343744913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,131071,0.1784586707750956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,131071,0.17453332742055258
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,128,131071,0.17406932512919107
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,131071,0.22573866446812949
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,131071,0.13781866431236267
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,131071,0.14098133643468222
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,131071,0.13552533586819968
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,131071,0.1363040010134379
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,131071,0.13673067092895508
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,131071,0.13834666212399802
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,128,131071,0.1356000006198883
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,1,0.028378665447235107
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,1,0.024720000723997753
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,1,0.026000000536441803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,1,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,1,0.03513066718975703
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,1,0.026874666412671406
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,1,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,1,0.026378666361172993
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,1,0.023658665517965954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,1,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,1,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,1,0.028181334336598713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,1,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,1,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,1,0.026517334083716076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,1,0.026447998980681103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,3,0.02810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,3,0.02399466683467229
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,3,0.02685333291689555
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,3,0.0262719988822937
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,3,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,3,0.02661866694688797
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,3,0.026565333207448322
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,3,0.02645866572856903
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,3,0.026288000245889027
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,3,0.027855999767780304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,3,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,3,0.02587199956178665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,3,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,7,0.024175999065240223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,3,0.02604266752799352
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,3,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,3,0.025850666066010792
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,7,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,7,0.02629333237806956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,7,0.028346667687098186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,7,0.02587733417749405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,7,0.026026666164398193
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,7,0.025850666066010792
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,7,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,7,0.02605333427588145
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,7,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,7,0.028005334238211315
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,7,0.026629333694775898
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,7,0.025802666942278545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,7,0.025962665677070618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,7,0.026170666019121807
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,15,0.027749332288901012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,7,0.025946666797002155
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,15,0.026677332818508148
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,15,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,15,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,15,0.026704000929991405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,15,0.02608533451954524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,15,0.026133333643277485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,15,0.02606400102376938
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,15,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,15,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,15,0.026560001075267792
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,15,0.026165333886941273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,15,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,15,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,15,0.02589866767326991
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,31,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,15,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,31,0.022656001150608063
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,31,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,31,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,31,0.02587199956178665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,31,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,31,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,31,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,31,0.022255999346574146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,31,0.026863999664783478
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,31,0.025861332813898723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,31,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,31,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,31,0.024682665864626568
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,31,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,31,0.024549332757790882
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,63,0.022602667411168415
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,63,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,63,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,63,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,63,0.02402133246262868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,63,0.024517332514127094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,63,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,63,0.024117333193620045
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,63,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,63,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,63,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,63,0.023930666347344715
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,63,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,63,0.024197332561016083
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,63,0.023669332265853882
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,63,0.023904000719388325
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,127,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,127,0.025834667185942333
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,127,0.03001066545645396
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,127,0.027845333019892376
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,127,0.02769600103298823
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,127,0.028607999285062153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,127,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,127,0.027845333019892376
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,127,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,127,0.028207999964555103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,127,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,127,0.028970666229724884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,127,0.026949333647886913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,255,0.02762666592995326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,127,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,127,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,127,0.026533332963784535
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,255,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,255,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,255,0.02979733298222224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,255,0.028325334191322327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,255,0.02940266579389572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,255,0.028042666614055634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,255,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,255,0.026928000152111053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,255,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,255,0.028586665789286297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,255,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,255,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,255,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,255,0.027610667049884796
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,255,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,511,0.02826666583617528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,511,0.03275733441114426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,511,0.03068266560633977
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,511,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,511,0.0345920001467069
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,511,0.03033066789309184
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,511,0.03047466774781545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,511,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,511,0.029120000700155895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,511,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,511,0.03679466744263967
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,511,0.03102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,511,0.0301706666747729
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,1023,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,511,0.02997333308060964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,1023,0.03225066761175791
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,511,0.030063999195893604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,1023,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,511,0.030192000170548756
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,1023,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,1023,0.034101332227389015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,1023,0.0330826664964358
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,1023,0.03292266776164373
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,1023,0.03186133255561193
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,1023,0.033071999748547874
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,1023,0.03572266548871994
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,1023,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,1023,0.032314665615558624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,1023,0.031983998914559685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,1023,0.03189333279927572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,1023,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,1023,0.03177600105603536
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,2047,0.03992533435424169
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,2047,0.03893866638342539
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,2047,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,2047,0.03700799991687139
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,2047,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,2047,0.03745600084463755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,2047,0.037061333656311035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,2047,0.036730666955312095
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,2047,0.03585066646337509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,2047,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,2047,0.03875733415285746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,2047,0.03678400069475174
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,2047,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,2047,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,2047,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,2047,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,4095,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,4095,0.05097599824269613
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,4095,0.0440533310174942
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,4095,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,4095,0.044581333796183266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,4095,0.042863999803860985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,4095,0.043151999513308205
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,4095,0.04311466713746389
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,4095,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,4095,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,4095,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,4095,0.03990400085846583
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,4095,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,4095,0.0391146664818128
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,4095,0.03902933249870936
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,4095,0.039077334105968475
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,8191,0.06339199841022491
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,8191,0.0625439981619517
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,8191,0.05727999905745188
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,8191,0.0589279979467392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,8191,0.056090667843818665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,8191,0.05083199838797251
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,8191,0.055311997731526695
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,8191,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,8191,0.056458666920661926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,8191,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,8191,0.054757331808408104
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,8191,0.048432002464930214
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,8191,0.04748799900213877
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,8191,0.046309332052866616
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,8191,0.047685335079828896
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,8191,0.046096002062161766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,16383,0.08078399797280629
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,16383,0.0893440047899882
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,16383,0.07498666644096375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,16383,0.07378666599591573
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,16383,0.07381333410739899
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,16383,0.07257066667079926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,16383,0.07227733234564464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,16383,0.0728959987560908
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,16383,0.06781333188215892
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,16383,0.08029866715272267
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,16383,0.06154133379459381
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,16383,0.06338133414586385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,16383,0.0621013343334198
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,16383,0.060693333546320595
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,16383,0.06043733159701029
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,16383,0.060677334666252136
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,32767,0.10505599776903789
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,32767,0.10638933380444844
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,32767,0.1458346645037333
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,32767,0.10822400450706482
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,32767,0.11377599835395813
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,32767,0.1090133289496104
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,32767,0.13005866607030234
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,32767,0.10656533638636272
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,32767,0.1065066655476888
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,32767,0.09278399745623271
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,32767,0.08747200171152751
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,32767,0.08814932902654012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,32767,0.08669867118199666
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,32767,0.08693333466847737
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,32767,0.08633599678675334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,32767,0.08637332916259766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,65535,0.25331199169158936
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,65535,0.17985065778096518
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,65535,0.18606932957967123
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,65535,0.17702933152516684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,65535,0.17866132656733194
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,65535,0.17754133542378744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,65535,0.17499200503031412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,65535,0.2250773310661316
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,65535,0.1758613387743632
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,65535,0.13513599832852682
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,65535,0.13946666320165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,65535,0.14122666915257773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,65535,0.13537066181500754
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,65535,0.13538666566212973
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,65535,0.13683199882507324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,131071,0.31666133801142377
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,65535,0.13515200217564902
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,131071,0.47121067841847736
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,131071,0.30852800607681274
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,131071,0.3081706762313843
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,131071,0.3050346573193868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,131071,0.3059893250465393
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,131071,0.310698668162028
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,128,131071,0.3053706685702006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,131071,0.41733332475026447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,131071,0.23552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,128,131071,0.23081066211064658
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,131071,0.23433599869410196
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,131071,0.23778132597605386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,131071,0.23332800467809042
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,131071,0.23265065749486288
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,1,0.02638400097688039
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,131071,0.2325920065244039
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,1,0.029834667841593426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,1,0.02827200045188268
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,1,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,1,0.028394666810830433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,1,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,1,0.027866666515668232
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,1,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,1,0.02664533257484436
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,1,0.03036266565322876
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,1,0.028805332879225414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,1,0.028058665494124096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,1,0.027893332143624622
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,1,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,1,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,3,0.026517334083716076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,3,0.028714666763941448
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,1,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,3,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,3,0.028005334238211315
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,3,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,3,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,3,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,3,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,3,0.02998399982849757
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,3,0.025941332181294758
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,3,0.028245332340399425
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,3,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,3,0.027914665639400482
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,3,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,7,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,7,0.0258240004380544
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,3,0.026821332673231762
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,7,0.02884799987077713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,3,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,7,0.028890666862328846
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,7,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,7,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,7,0.026330667237440746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,7,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,7,0.02683199942111969
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,7,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,7,0.028832000990708668
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,7,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,7,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,7,0.026762666801611584
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,15,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,15,0.027914665639400482
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,7,0.026714667677879333
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,7,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,15,0.029834667841593426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,15,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,15,0.026928000152111053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,15,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,15,0.02664000044266383
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,15,0.02977599948644638
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,15,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,15,0.026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,15,0.026719999810059864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,15,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,15,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,15,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,15,0.026447998980681103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,15,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,31,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,31,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,31,0.026848000784715016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,31,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,31,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,31,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,31,0.026000000536441803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,31,0.028005334238211315
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,31,0.026869334280490875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,31,0.024656000236670177
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,31,0.0284853329261144
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,31,0.02661866694688797
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,31,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,31,0.026842666169007618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,31,0.02569599946339925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,31,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,63,0.025685332715511322
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,63,0.024549332757790882
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,63,0.027808000644048054
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,63,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,63,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,63,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,63,0.025920001169045765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,63,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,63,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,63,0.028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,63,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,63,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,63,0.024911999702453613
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,63,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,63,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,63,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,127,0.02624533325433731
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,127,0.0322826678554217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,127,0.0306986669699351
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,127,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,127,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,127,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,127,0.028938665986061096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,127,0.028389332195123036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,127,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,127,0.03219733387231827
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,127,0.029648000995318096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,127,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,127,0.028042666614055634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,127,0.02867199977238973
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,127,0.028949332733949024
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,127,0.027973333994547527
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,255,0.03396799912055334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,255,0.032645332316557564
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,255,0.032672000428040825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,255,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,255,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,255,0.031002665559450786
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,255,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,255,0.03084266682465871
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,255,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,255,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,255,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,255,0.047210668524106346
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,255,0.030282666285832722
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,255,0.030202666918436687
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,255,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,255,0.02980799973011017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,511,0.03538133452335993
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,511,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,511,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,511,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,511,0.033002667129039764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,511,0.03409066547950109
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,511,0.03283733377854029
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,511,0.03277866790692011
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,511,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,511,0.036277333895365395
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,511,0.03365333378314972
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,511,0.03243733445803324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,511,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,511,0.032314665615558624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,511,0.031680000325044
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,511,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,1023,0.039877332746982574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,1023,0.036943999429543815
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,1023,0.04190933207670847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,1023,0.038805333276589714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,1023,0.03783999880154928
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,1023,0.038165333370367684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,1023,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,1023,0.03745600084463755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,1023,0.03823466598987579
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,1023,0.03945599993069967
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,1023,0.03607466568549474
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,1023,0.03810133288304011
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,1023,0.03602133442958196
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,1023,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,1023,0.0358240008354187
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,1023,0.03585066646337509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,2047,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,2047,0.050623998045921326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,2047,0.045066664616266884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,2047,0.04292800029118856
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,2047,0.04409599800904592
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,2047,0.04223999877770742
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,2047,0.04432533184687296
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,2047,0.042352000872294106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,2047,0.04613866905371348
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,2047,0.04507733384768168
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,2047,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,2047,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,2047,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,2047,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,2047,0.039919999738534294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,2047,0.040448000033696495
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,4095,0.06408533453941345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,4095,0.06391466657320659
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,4095,0.059487998485565186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,4095,0.057477335135142006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,4095,0.05849599838256836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,4095,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,4095,0.056688000758488975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,4095,0.05570666491985321
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,4095,0.05535466472307841
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,4095,0.05566399792830149
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,4095,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,4095,0.04905066887537638
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,4095,0.04703466594219208
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,4095,0.047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,4095,0.047210668524106346
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,4095,0.04587733248869578
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,8191,0.09338666995366414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,8191,0.07694933315118153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,8191,0.08275199929873149
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,8191,0.07522666454315186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,8191,0.07896533111731212
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,8191,0.07356800138950348
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,8191,0.07334400216738383
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,8191,0.07294933497905731
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,8191,0.06943466762701671
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,8191,0.08711466193199158
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,8191,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,8191,0.0649599979321162
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,8191,0.06563733518123627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,16383,0.1476959983507792
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,8191,0.06251200040181477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,8191,0.063509335120519
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,16383,0.11289067069689433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,8191,0.06192533175150553
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,16383,0.1104159951210022
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,16383,0.10804266730944316
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,16383,0.10686399539311726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,16383,0.10898666580518086
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,16383,0.10540800293286641
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,16383,0.10544000069300334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,16383,0.13287466764450073
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,16383,0.09252799550692241
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,16383,0.0888320008913676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,16383,0.08789867162704468
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,16383,0.08740267157554626
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,16383,0.08584533135096233
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,32767,0.18121600151062012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,16383,0.08724799752235413
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,32767,0.2589120070139567
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,16383,0.08614400029182434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,32767,0.18703466653823853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,32767,0.17654399077097574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,32767,0.17862399419148764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,32767,0.1759200096130371
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,32767,0.17498133579889932
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,32767,0.23118933041890463
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,32767,0.14221333463986716
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,32767,0.1747573415438334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,32767,0.137472003698349
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,32767,0.13612799843152365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,32767,0.13499200344085693
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,32767,0.13594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,32767,0.1344533363978068
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,32767,0.13576533397038779
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,65535,0.47577067216237384
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,65535,0.3225333293279012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,65535,0.31169599294662476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,65535,0.3113226691881816
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,65535,0.30901867151260376
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,65535,0.30713599920272827
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,65535,0.3108266592025757
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,65535,0.30662933985392254
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,65535,0.2387626568476359
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,65535,0.23237866163253784
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,65535,0.23243733247121176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,65535,0.42081598440806073
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,65535,0.23883734146753946
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,65535,0.23080533742904663
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,65535,0.2327786684036255
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,65535,0.23149865865707397
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,131071,0.5771946509679159
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,131071,0.5834240118662516
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,131071,0.574842651685079
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,131071,0.5763306617736816
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,131071,0.5728960037231445
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,131071,0.5739466746648153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,131071,0.9103413422902426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,128,131071,0.5699253479639689
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,131071,0.8027679920196533
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,131071,0.43030401070912677
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,1,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,131071,0.42474134763081867
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,131071,0.42588265736897785
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,131071,0.42400534947713214
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,131071,0.4205919901529948
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,128,131071,0.4230240186055501
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,131071,0.4233280022939046
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,1,0.03278400003910065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,1,0.030949334303538006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,1,0.029781334102153778
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,1,0.029711998999118805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,1,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,1,0.028933333853880566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,1,0.028837333122889202
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,1,0.032655999064445496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,1,0.03002133220434189
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,1,0.03046933313210805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,1,0.02979733298222224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,3,0.030218665798505146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,1,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,1,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,1,0.03025600065787633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,3,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,3,0.032501332461833954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,1,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,3,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,3,0.029951999584833782
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,3,0.029743999242782593
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,3,0.02980799973011017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,3,0.029738667110602062
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,3,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,3,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,3,0.030832000076770782
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,3,0.032501332461833954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,3,0.03009066730737686
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,3,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,3,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,7,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,3,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,7,0.02864533414443334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,7,0.029722665747006733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,7,0.030847998956839245
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,7,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,7,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,7,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,7,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,7,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,7,0.03234666585922241
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,7,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,7,0.030591999491055805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,7,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,7,0.029648000995318096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,7,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,15,0.028757333755493164
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,7,0.029850666721661884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,15,0.03200000027815501
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,15,0.03027733415365219
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,15,0.029813334345817566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,15,0.03473600000143051
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,15,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,15,0.028773332635561626
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,15,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,15,0.03197866678237915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,15,0.03014400104681651
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,15,0.029850666721661884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,15,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,15,0.030213333666324615
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,15,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,15,0.029077333708604176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,31,0.02788266787926356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,15,0.02906133234500885
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,31,0.02956266701221466
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,31,0.028746667007605236
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,31,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,31,0.028666667640209198
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,31,0.028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,31,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,31,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,31,0.028773332635561626
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,31,0.031119999786218006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,31,0.028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,31,0.028666667640209198
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,31,0.027808000644048054
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,31,0.028016000986099243
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,31,0.027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,31,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,63,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,63,0.028064000109831493
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,63,0.03090133269627889
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,63,0.027978666126728058
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,63,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,63,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,63,0.027765333652496338
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,63,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,63,0.030965333183606465
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,63,0.028165332973003387
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,63,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,63,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,63,0.0286613330245018
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,63,0.026816000541051228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,63,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,63,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,127,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,127,0.02974933385848999
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,127,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,127,0.03009066730737686
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,127,0.029893333713213604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,127,0.030458666384220123
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,127,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,127,0.029909332593282063
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,127,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,127,0.032032000521818794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,127,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,127,0.02997333308060964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,127,0.028714666763941448
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,127,0.028597332537174225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,127,0.02940266579389572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,127,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,255,0.03270400067170461
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,255,0.04244266450405121
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,255,0.03615466753641764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,255,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,255,0.03289599965016047
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,255,0.036602665980656944
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,255,0.03569599986076355
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,255,0.035829332967599235
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,255,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,255,0.03920533259709676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,255,0.035002666215101876
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,255,0.03426666557788849
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,255,0.034501334031422935
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,255,0.035749333600203194
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,255,0.03451200077931086
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,255,0.0339626669883728
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,511,0.04665066798528036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,511,0.038949333131313324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,511,0.043061330914497375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,511,0.037989333271980286
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,511,0.03775466730197271
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,511,0.03845866769552231
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,511,0.03815466662247976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,511,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,511,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,511,0.04470400015513102
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,511,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,511,0.036687999963760376
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,1023,0.05710933109124502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,511,0.036015999813874565
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,511,0.03611200054486593
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,511,0.036117332677046456
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,511,0.03579200059175491
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,1023,0.053247998158137
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,1023,0.046469335754712425
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,1023,0.043696001172065735
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,1023,0.04544533292452494
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,1023,0.0439626673857371
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,1023,0.04359466830889384
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,1023,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,1023,0.05677333474159241
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,1023,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,1023,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,1023,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,1023,0.04399466514587402
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,2047,0.06499733527501424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,1023,0.041135999063650765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,1023,0.04101333270470301
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,2047,0.07027733325958252
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,1023,0.041322665909926094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,2047,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,2047,0.0588266650835673
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,2047,0.057328000664711
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,2047,0.058042665322621666
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,2047,0.05671466886997223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,2047,0.05574933191140493
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,2047,0.058592001597086586
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,2047,0.06436799963315327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,2047,0.04969066878159841
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,2047,0.05148266752560934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,2047,0.047466665506362915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,2047,0.0473333348830541
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,2047,0.04736533264319102
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,4095,0.07851199805736542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,4095,0.07529599964618683
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,4095,0.09884267052014668
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,2047,0.04737600187460581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,4095,0.07704533139864604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,4095,0.0842186709245046
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,4095,0.07525333265463512
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,4095,0.08993599812189738
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,4095,0.07369600236415863
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,4095,0.07400000095367432
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,4095,0.06656533479690552
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,4095,0.07150933146476746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,4095,0.2286720077196757
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,4095,0.06533333162466685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,4095,0.0632533331712087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,4095,0.0646666685740153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,4095,0.06281066437562306
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,8191,0.15639467040697733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,8191,0.10939733187357585
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,8191,0.1127946674823761
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,8191,0.1190720001856486
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,8191,0.11165866255760193
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,8191,0.1090719997882843
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,8191,0.10905599594116211
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,8191,0.1128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,8191,0.13715733091036478
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,8191,0.09665600458780925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,8191,0.08923199772834778
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,8191,0.0918293297290802
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,8191,0.08851733803749084
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,8191,0.08884800473848979
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,8191,0.08842133482297261
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,8191,0.0883733332157135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,16383,0.26446400086085003
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,16383,0.18503999710083008
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,16383,0.175327996412913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,16383,0.17797333002090454
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,16383,0.17802133162816366
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,16383,0.17537067333857217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,16383,0.17537067333857217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,16383,0.17485866943995157
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,16383,0.13913066188494363
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,16383,0.2352693279584249
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,16383,0.1443946659564972
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,16383,0.13667200009028116
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,16383,0.13583999872207642
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,16383,0.13769066333770752
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,16383,0.1365333298842112
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,16383,0.13657599687576294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,32767,0.4818933407465617
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,32767,0.3245813250541687
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,32767,0.3165066639582316
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,32767,0.31043734153111774
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,32767,0.31143466631571454
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,32767,0.30796267588933307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,32767,0.30826665957768756
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,32767,0.4270240068435669
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,32767,0.3073973258336385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,32767,0.24269866943359375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,32767,0.2360960046450297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,32767,0.23466666539510092
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,32767,0.23248000939687094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,32767,0.23801066478093466
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,32767,0.23222933212916055
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,32767,0.2350026567776998
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,65535,0.917744000752767
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,65535,0.5919413169225057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,65535,0.5744853417078654
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,65535,0.5720266501108805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,65535,0.5814346472422282
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,65535,0.5778293212254842
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,65535,0.5733333428700765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,65535,0.5711359977722168
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,65535,0.8092906475067139
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,65535,0.42763733863830566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,65535,0.4329386552174886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,65535,0.4221493403116862
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,65535,0.42798932393391925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,65535,0.4260266621907552
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,65535,0.4264959891637166
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,65535,0.4254080057144165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,131071,1.7878665924072266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,131071,1.1108799775441487
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,131071,1.1200533707936604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,131071,1.1039040088653564
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,131071,1.1037279764811199
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,131071,1.099776029586792
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,128,131071,1.1006773312886555
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,131071,1.10316801071167
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,131071,1.576101303100586
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,131071,0.812602678934733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,1,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,131071,0.8119040330251058
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,1,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,131071,0.8192266623179117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,131071,0.8153386910756429
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,131071,0.8152693112691244
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,128,131071,0.8099733193715414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,131071,0.803925355275472
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,1,0.03480533262093862
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,1,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,1,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,1,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,1,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,1,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,1,0.03882133215665817
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,1,0.0386613334218661
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,1,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,1,0.034128000338872276
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,1,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,3,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,1,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,3,0.03594133257865906
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,1,0.03267733256022135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,1,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,3,0.03783999880154928
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,3,0.03385066737731298
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,3,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,3,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,3,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,3,0.038922667503356934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,3,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,3,0.03298133363326391
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,3,0.03828266759713491
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,3,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,3,0.033743999898433685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,3,0.03307733436425527
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,3,0.03294399877389272
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,3,0.03316266586383184
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,7,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,7,0.033914667864640556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,7,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,7,0.03461866577466329
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,7,0.033039999504884086
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,7,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,7,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,7,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,7,0.03894400099913279
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,7,0.03295466552178065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,7,0.038575999438762665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,7,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,7,0.03463999927043915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,7,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,7,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,7,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,15,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,15,0.03707200040419897
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,15,0.0330826664964358
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,15,0.03472000112136205
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,15,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,15,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,15,0.03219733387231827
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,15,0.03260799994071325
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,15,0.03846933444341024
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,15,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,15,0.03808533400297165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,15,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,15,0.03522133330504099
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,15,0.03309333324432373
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,15,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,15,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,31,0.03708266715208689
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,31,2.7574453353881836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,31,0.03451200077931086
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,31,0.03246400008598963
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,31,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,31,0.032325332363446556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,31,0.03226666649182638
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,31,0.032325332363446556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,31,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,31,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,31,0.03422933320204417
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,31,0.03268799930810928
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,31,0.03196800003449122
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,31,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,31,0.031957333286603294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,31,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,63,0.034154665966828666
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,63,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,63,0.037018666664759316
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,63,0.03182400017976761
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,63,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,63,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,63,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,63,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,63,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,63,0.036837334434191384
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,63,0.03180266668399175
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,63,0.036917333801587425
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,63,0.03091199944416682
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,63,0.030693332354227703
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,63,0.030746666093667347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,63,0.03297599901755651
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,127,0.03673599908749262
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,127,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,127,0.035743998984495796
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,127,0.038959999879201256
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,127,0.03473600000143051
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,127,0.03381866713364919
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,127,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,127,0.03806400050719579
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,127,0.036176001032193504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,127,0.038560000558694206
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,127,0.03201066702604294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,127,0.03473600000143051
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,127,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,127,0.03268266717592875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,127,0.032655999064445496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,255,0.04057066639264425
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,127,0.0321066677570343
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,255,0.04267199834187826
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,255,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,255,0.03828799972931544
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,255,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,255,0.03719999889532725
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,255,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,255,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,255,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,255,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,255,0.037952000896135964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,255,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,255,0.03667200108369192
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,255,0.03585600107908249
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,255,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,255,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,511,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,511,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,511,0.053370664517084755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,511,0.04836266736189524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,511,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,511,0.04841066896915436
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,511,0.0469813346862793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,511,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,511,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,511,0.04737600187460581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,511,0.04911466439565023
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,511,0.04444799820582072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,511,0.04413333535194397
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,511,0.04506133496761322
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,511,0.0439626673857371
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,1023,0.07382399837176006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,511,0.044213334719340004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,1023,0.06589866677920024
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,1023,0.06619200110435486
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,1023,0.061306665341059365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,1023,0.060789331793785095
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,1023,0.06253866851329803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,1023,0.061109334230422974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,1023,0.06102933486302694
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,1023,0.0642080008983612
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,1023,0.059749335050582886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,1023,0.0670773337284724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,1023,0.05204799771308899
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,1023,0.054698665936787925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,1023,0.05229333539803823
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,1023,0.05234666665395101
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,1023,0.05276800195376078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,2047,0.0925546685854594
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,2047,0.09418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,2047,0.08318933347860973
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,2047,0.08064533273379008
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,2047,0.08079466720422109
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,2047,0.0782239983479182
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,2047,0.07990399996439616
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,2047,0.07811200122038524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,2047,0.08707732955614726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,2047,0.07863999903202057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,2047,0.07135466734568278
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,2047,0.06790933509667714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,2047,0.06693866848945618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,2047,0.0670826683441798
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,2047,0.06604266663392384
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,4095,0.1193386713663737
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,2047,0.06636266907056172
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,4095,0.15040533741315207
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,4095,0.12729066610336304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,4095,0.11549333731333415
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,4095,0.11502933502197266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,4095,0.11331733067830403
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,4095,0.11593066652615865
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,4095,0.11173333724339803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,4095,0.14032000303268433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,4095,0.10362133383750916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,4095,0.09619733691215515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,4095,0.09622400005658467
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,4095,0.09231467048327129
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,4095,0.0925546685854594
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,4095,0.09150399764378865
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,4095,0.0909546713034312
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,8191,0.2621493339538574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,8191,0.19104532400767008
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,8191,0.18330132961273193
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,8191,0.179802676041921
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,8191,0.18412800629933676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,8191,0.17907732725143433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,8191,0.17938133080800375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,8191,0.14029332995414734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,8191,0.1439786652723948
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,8191,0.17805866400400797
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,8191,0.13924266894658408
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,8191,0.14012266198794046
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,8191,0.1513759990533193
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,8191,0.23638933897018433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,8191,0.14050666491190592
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,8191,0.13922666509946188
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,16383,0.4954880078633626
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,16383,0.32543466488520306
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,16383,0.33876800537109375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,16383,0.31294933954874676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,16383,0.3149706721305847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,16383,0.3134133418401082
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,16383,0.3184853394826253
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,16383,0.3120959997177124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,16383,0.44117867946624756
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,16383,0.24223466714223227
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,16383,0.23707199096679688
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,16383,0.24899200598398843
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,16383,0.2373653252919515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,16383,0.23867199818293253
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,16383,0.23760000864664713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,16383,0.23757867018381754
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,32767,0.9299733638763428
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,32767,0.6065973440806071
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,32767,0.591269334157308
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,32767,0.5822133223215739
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,32767,0.580623984336853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,32767,0.579034686088562
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,32767,0.5757173299789429
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,32767,0.58297065893809
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,32767,0.8155679702758789
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,32767,0.4419146776199341
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,32767,0.43434667587280273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,32767,0.43122665087382
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,32767,0.4289439916610718
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,32767,0.4285120169321696
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,32767,0.42583465576171875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,65535,1.1183466911315918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,32767,0.42907734711964923
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,65535,1.134709358215332
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,65535,1.7859946886698406
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,65535,1.114517370859782
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,65535,1.586890697479248
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,65535,1.1074346701304119
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,65535,1.102122704188029
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,65535,1.1072853406270344
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,65535,1.1040159861246746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,65535,0.8117760022481283
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,65535,0.8216586907704672
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,65535,0.8160746892293295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,65535,0.8136959870656332
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,65535,0.8131146430969238
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,65535,0.8111519813537598
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,65535,0.8172319730122884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,131071,2.1881866455078125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,131071,2.168954690297445
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,131071,3.5559679667154946
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,131071,2.156709353129069
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,131071,2.162933349609375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,131071,2.1633920669555664
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,131071,2.1551040013631186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,128,131071,2.15336004892985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,131071,3.0952320098876953
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,131071,1.5796480178833008
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,1,0.05203733344872793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,131071,1.582058588663737
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,131071,1.5829226175944011
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,131071,1.5822827021280925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,1,0.047338664531707764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,131071,1.5812160174051921
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,131071,1.5823787053426106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,128,131071,1.5804853439331055
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,1,0.04297066728274027
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,1,0.04125333329041799
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,1,0.04082666585842768
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,1,0.04020266731580099
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,1,0.039936001102129616
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,1,0.061280002196629844
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,1,0.04005333284536997
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,1,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,1,0.04422933359940847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,1,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,1,0.04040000090996424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,1,0.04035199930270513
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,1,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,1,0.03952533255020777
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,3,0.0524533341328303
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,3,0.04706666866938273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,3,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,3,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,3,0.0407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,3,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,3,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,3,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,3,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,3,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,3,0.04420266548792521
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,3,0.04131199916203817
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,3,0.040693332751592
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,3,0.039861333866914116
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,3,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,3,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,7,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,7,0.046800002455711365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,7,0.04326933125654856
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,7,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,7,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,7,0.040522667268911995
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,7,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,7,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,7,0.061109334230422974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,7,0.05017599960168203
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,7,0.044682666659355164
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,7,0.040463998913764954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,7,0.04199466605981191
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,7,0.04015466570854187
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,7,0.04009066770474116
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,7,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,15,0.05138133466243744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,15,0.040474665661652885
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,15,0.04677333434422811
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,15,0.04246933261553446
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,15,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,15,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,15,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,15,0.0499839981396993
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,15,0.03908800085385641
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,15,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,15,0.044266665975252785
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,15,0.04026666780312856
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,15,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,15,0.03995199998219808
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,15,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,15,0.043280000487963356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,31,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,31,0.046015997727712
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,31,0.04233066737651825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,31,0.03932266682386398
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,31,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,31,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,31,0.03875199953715006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,31,0.039061332742373146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,31,0.04342933495839437
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,31,0.05940799911816915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,31,0.04857600231965383
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,31,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,31,0.038378665844599404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,31,0.04033066580692927
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,31,0.038133333126703896
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,31,0.03860799968242645
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,63,0.05320533116658529
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,63,0.03880000114440918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,63,0.0460746685663859
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,63,0.041722665230433144
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,63,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,63,0.038560000558694206
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,63,0.03769599894682566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,63,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,63,0.047872001926104225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,63,0.058778668443361916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,63,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,63,0.039247999588648476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,63,0.04229333500067393
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,63,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,63,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,63,0.037392000357309975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,127,0.0595360000928243
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,127,0.04450133442878723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,127,0.04859200119972229
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,127,0.041135999063650765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,127,0.04099733382463455
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,127,0.042447999119758606
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,127,0.040778666734695435
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,127,0.040933333337306976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,127,0.06228800117969513
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,127,0.049914668003718056
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,127,0.04030933231115341
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,127,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,127,0.043237333496411644
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,127,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,127,0.03952533255020777
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,127,0.038959999879201256
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,255,0.06709333260854085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,255,0.044639999667803444
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,255,0.05750933289527893
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,255,0.04645333190759023
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,255,0.050106664498647056
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,255,0.045109331607818604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,255,0.04491733511288961
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,255,0.0443146675825119
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,255,0.06899733344713847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,255,0.05465066432952881
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,255,0.04308799902598063
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,255,0.046682665745417275
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,255,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,255,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,255,0.04222933451334635
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,255,0.042261332273483276
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,511,0.0799786647160848
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,511,0.06756266454855601
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,511,0.060975998640060425
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,511,0.0590826670328776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,511,0.058975999553998314
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,511,0.06268799801667531
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,511,0.06053866446018219
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,511,0.05958933134873708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,511,0.08122666676839192
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,511,0.06330666442712148
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,511,0.05517866710821787
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,511,0.05142400165398916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,511,0.04920533299446106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,511,0.04811733464399973
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,1023,0.10829333464304607
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,511,0.052245333790779114
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,511,0.04814399778842926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,1023,0.08578133583068848
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,1023,0.07593599955240886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,1023,0.07671999931335449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,1023,0.0784746656815211
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,1023,0.08216000099976857
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,1023,0.07733333110809326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,1023,0.07745066781838734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,1023,0.0767680009206136
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,1023,0.10637332995732625
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,1023,0.0694400022427241
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,1023,0.06647466619809468
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,1023,0.06502933303515117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,1023,0.06434666613737743
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,1023,0.06557866434256236
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,1023,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,2047,0.16421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,2047,0.12000000476837158
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,2047,0.1141866644223531
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,2047,0.11277332901954651
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,2047,0.11238933602968852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,2047,0.10924266775449117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,2047,0.10962133606274922
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,2047,0.11125866572062175
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,2047,0.15388799707094827
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,2047,0.1013813316822052
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,2047,0.09876799583435059
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,2047,0.091648002465566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,2047,0.09015466769536336
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,2047,0.09071466326713562
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,2047,0.08964266379674275
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,4095,0.2775306701660156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,4095,0.18797866503397623
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,2047,0.08957333366076152
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,4095,0.1825066606203715
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,4095,0.1805866758028666
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,4095,0.1783199906349182
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,4095,0.17788267135620117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,4095,0.254746675491333
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,4095,0.17987199624379477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,4095,0.15013866623242697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,4095,0.17745065689086914
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,4095,0.14313066999117532
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,4095,0.14102400342623392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,4095,0.13954666256904602
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,4095,0.13896000385284424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,4095,0.13925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,8191,0.31414933999379474
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,4095,0.13936533530553183
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,8191,0.5009760061899821
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,8191,0.3111039996147156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,8191,0.3199839989344279
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,8191,0.3115839958190918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,8191,0.3092479904492696
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,8191,0.309173325697581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,8191,0.24714134136835733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,8191,0.31095999479293823
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,8191,0.45216532548268634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,8191,0.24197866519292197
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,8191,0.2396906614303589
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,8191,0.24033600091934204
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,8191,0.23780266443888345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,8191,0.2367146611213684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,8191,0.23773332436879477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,16383,0.6270933151245117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,16383,0.5855679909388224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,16383,0.9410186608632406
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,16383,0.602453351020813
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,16383,0.586026668548584
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,16383,0.6086133321126302
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,16383,0.580298662185669
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,16383,0.5771146615346273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,16383,0.8434293270111084
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,16383,0.45763198534647626
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,16383,0.43562666575113934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,16383,0.44511465231577557
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,16383,0.434554656346639
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,16383,0.4341919819513957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,16383,0.43405866622924805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,16383,0.43359466393788654
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,32767,1.1636426448822021
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,32767,1.8111626307169597
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,32767,1.141535997390747
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,32767,1.121951977411906
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,32767,1.1030453046162922
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,32767,1.1097866694132488
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,32767,1.117461363474528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,32767,1.1091893513997395
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,32767,1.6029119491577148
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,32767,0.8396373589833578
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,32767,0.8166240056355795
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,32767,0.8104480107625326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,32767,0.8264586925506592
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,32767,0.8205599784851074
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,32767,0.8171679973602295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,32767,0.8172799746195475
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,65535,3.549685478210449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,65535,2.219653288523356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,65535,2.189919948577881
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,65535,2.174725373586019
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,65535,2.1666879653930664
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,65535,2.1686293284098306
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,65535,2.1566665967305503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,65535,2.15718936920166
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,65535,1.6050346692403157
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,65535,3.1341066360473633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,65535,1.5822505950927734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,65535,1.5684693654378254
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,65535,1.586938699086507
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,65535,1.579045295715332
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,65535,1.58078400293986
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,65535,1.5880427360534668
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,131071,4.31768004099528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,131071,4.288138707478841
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,131071,7.138250350952148
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,131071,4.273765246073405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,131071,4.265109380086263
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,128,131071,4.273599942525228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,131071,4.256997426350911
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,131071,4.2581227620442705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,131071,3.1144533157348633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,131071,6.1905867258707685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,131071,3.123978614807129
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,1,0.08917333682378133
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,1,0.05855466425418854
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,131071,3.095989227294922
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,1,0.07067200044790904
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,1,0.055125330885251365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,131071,3.1163412729899087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,1,0.05389333268006643
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,131071,3.1172428131103516
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,131071,3.111695925394694
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,1,0.05276800195376078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,128,131071,3.1121066411336265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,1,0.052655999859174095
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,1,0.05259199937184652
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,1,0.06111466884613037
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,1,0.10527466734250386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,1,0.07545066873232524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,1,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,1,0.05323199927806854
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,1,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,1,0.0525439977645874
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,1,0.052485331892967224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,3,0.06621333460013072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,3,0.08888000249862671
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,3,0.05898666878541311
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,3,0.05522133409976959
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,3,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,3,0.05259199937184652
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,3,0.05228800078233083
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,3,0.052442664901415505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,3,0.1049066682656606
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,3,0.0757173349459966
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,3,0.060778667529424034
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,3,0.05340266724427541
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,3,0.05586666862169901
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,3,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,3,0.05199466645717621
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,7,0.08899733424186707
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,7,0.0668639987707138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,3,0.05186133086681366
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,7,0.05807999769846598
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,7,0.05474133292833964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,7,0.053717335065205894
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,7,0.05256533126036326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,7,0.07520533104737599
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,7,0.05226666728655497
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,7,0.05573866764704386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,7,0.10427733262379964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,7,0.06165866553783417
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,7,0.0529120018084844
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,7,0.056799997886021934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,7,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,7,0.05272000034650167
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,15,0.08903466661771138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,15,0.05793599784374237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,7,0.05211733281612396
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,15,0.05146133402983347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,15,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,15,0.06577066580454509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,15,0.052895997961362205
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,15,0.0514933317899704
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,15,0.051488002141316734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,15,0.1032533347606659
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,15,0.05156800150871277
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,15,0.060746664802233376
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,15,0.07482133309046428
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,15,0.05348266661167145
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,15,0.05551466842492422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,15,0.05237866441408793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,15,0.05229333539803823
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,31,0.06774933139483134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,31,0.08878399928410848
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,31,0.053455998500188194
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,31,0.05150933563709259
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,31,0.056789333621660866
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,31,0.05037866532802582
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,31,0.05073066552480062
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,31,0.10035733381907146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,31,0.07333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,31,0.05046399931112925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,31,0.053616002202034
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,31,0.05101866523424784
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,31,0.05867200096448263
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,31,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,31,0.04965866605440775
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,31,0.04946133494377136
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,63,0.09059199690818787
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,63,0.07074666519959767
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,63,0.04903466502825419
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,63,0.056426664193471275
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,63,0.05228800078233083
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,63,0.04939733445644379
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,63,0.05012266834576925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,63,0.04950400193532308
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,63,0.10181333621342976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,63,0.05693866809209188
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,63,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,63,0.05123733480771383
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,63,0.0470773329337438
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,63,0.048885335524876915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,63,0.047093331813812256
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,63,0.04683200021584829
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,127,0.09823466340700786
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,127,0.07694399853547414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,127,0.06714666883150737
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,127,0.055813332398732506
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,127,0.0554613322019577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,127,0.10988799730936687
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,127,0.07762133578459422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,127,0.05364799996217092
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,127,0.05913066864013672
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,127,0.0536106675863266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,127,0.0614879975716273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,127,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,127,0.05126399795214335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,127,0.05075199902057648
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,127,0.05187733471393585
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,127,0.050517335534095764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,255,0.08619200189908345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,255,0.11227200428644817
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,255,0.07702399790287018
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,255,0.07202133536338806
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,255,0.06929600238800049
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,255,0.06942933301130931
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,255,0.06742933392524719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,255,0.06764266888300578
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,255,0.11808533469835918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,255,0.08397866288820903
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,255,0.06338666876157124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,255,0.05709866682688395
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,255,0.07089066505432129
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,255,0.059861332178115845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,255,0.05693333347638448
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,255,0.05646933118502299
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,511,0.10486400127410889
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,511,0.13870400190353394
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,511,0.09454400340716045
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,511,0.09026133020718892
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,511,0.08841066559155782
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,511,0.08524266878763835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,511,0.1425440013408661
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,511,0.08530132969220479
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,511,0.08570667107899983
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,511,0.09594666957855225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,511,0.08341866731643677
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,511,0.07760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,511,0.07320533196131389
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,511,0.07517866790294647
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,511,0.07271466652552287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,511,0.07315200070540111
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,1023,0.1976426641146342
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,1023,0.1400320033232371
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,1023,0.12850133577982584
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,1023,0.1250933309396108
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,1023,0.12004799644152324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,1023,0.12107200423876445
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,1023,0.12107200423876445
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,1023,0.19311465819676718
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,1023,0.119077334801356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,1023,0.1193386713663737
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,1023,0.10859733819961548
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,1023,0.1029866635799408
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,1023,0.10018666585286458
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,2047,0.19636799891789755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,1023,0.09771733482678731
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,1023,0.09839466214179993
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,2047,0.2056480050086975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,1023,0.09805867075920105
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,2047,0.3080906669298808
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,2047,0.18940800428390503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,2047,0.19197332859039307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,2047,0.1880906621615092
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,2047,0.18691200017929077
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,2047,0.1874613364537557
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,2047,0.2879413366317749
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,2047,0.17073599497477213
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,2047,0.15169066190719604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,2047,0.157642662525177
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,2047,0.15001066525777182
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,2047,0.14819199840227762
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,2047,0.14778133233388266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,4095,0.3396799961725871
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,2047,0.14664533734321594
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,4095,0.5266186793645223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,4095,0.33026132980982464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,4095,0.32542399565378827
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,4095,0.3343840042750041
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,4095,0.3199626604715983
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,4095,0.4788373311360677
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,4095,0.3200906713803609
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,4095,0.26968000332514447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,4095,0.3208746711413066
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,4095,0.2504640022913615
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,4095,0.2491733431816101
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,4095,0.2476159930229187
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,4095,0.25706666707992554
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,4095,0.24846933285395303
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,4095,0.2472320000330607
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,8191,0.961845318476359
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,8191,0.6462133328119913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,8191,0.5969173510869344
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,8191,0.62117866675059
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,8191,0.6075199842453003
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,8191,0.5922400156656901
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,8191,0.5896426836649576
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,8191,0.5917813380559286
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,8191,0.8689013322194418
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,8191,0.45505066712697345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,8191,0.45138665040334064
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,8191,0.4723786513010661
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,8191,0.4437493483225505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,8191,0.4418720006942749
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,8191,0.4389813343683879
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,8191,0.4434986511866252
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,16383,1.8337225914001465
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,16383,1.1857439676920574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,16383,1.1525226434071858
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,16383,1.1242667039235432
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,16383,1.1339999834696453
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,16383,1.1180533568064372
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,16383,1.1222453117370605
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,16383,1.1176640192667644
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,16383,0.8568320274353027
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,16383,1.6333333651224773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,16383,0.835536003112793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,16383,0.8192533651987711
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,16383,0.8248586654663086
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,16383,0.8321866989135742
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,16383,0.826474666595459
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,16383,0.8255679607391357
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,32767,2.2468320528666177
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,32767,2.206986745198568
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,32767,3.5723787943522134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,32767,2.188474655151367
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,32767,2.184021313985189
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,32767,2.17466672261556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,32767,2.168517271677653
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,32767,2.166986624399821
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,32767,3.1622772216796875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,32767,1.6113120714823406
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,32767,1.603834629058838
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,32767,1.596570650736491
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,32767,1.5920480092366536
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,32767,1.5913972854614258
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,32767,1.5914079348246257
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,32767,1.5800533294677734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,65535,4.34881591796875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,65535,4.32694943745931
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,65535,7.09552001953125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,65535,4.267829259236653
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,65535,4.263962745666504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,128,65535,4.261808077494304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,65535,4.291152000427246
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,65535,4.278848012288411
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,65535,3.1570558547973633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,65535,3.1309706370035806
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,65535,6.17420768737793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,65535,3.126490592956543
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,65535,3.1248534520467124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,65535,3.1242507298787436
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,1,0.15709333618481955
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,65535,3.1120532353719077
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,1,0.11020800471305847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,128,65535,3.121130625406901
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,1,0.08342933654785156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,1,0.09379200140635173
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,1,0.0798933357000351
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,1,0.08120533327261607
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,1,0.0775733341773351
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,1,0.07894399762153625
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,1,0.19093332688013712
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,1,0.12556800246238708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,1,0.09934932986895244
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,1,0.08521067102750142
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,1,0.0805866668621699
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,1,0.07852266728878021
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,1,0.07789333164691925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,1,0.07717866698900859
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,3,0.15438933173815408
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,3,0.08340266346931458
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,3,0.09495466947555542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,3,0.11256532867749532
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,3,0.07877866427103679
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,3,0.08028266827265422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,3,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,3,0.07769600053628285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,3,0.1250986655553182
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,3,0.18796799580256143
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,3,0.08448533217112224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,3,0.08021333316961925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,3,0.09832533200581868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,3,0.0775733341773351
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,3,0.07713066538174947
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,7,0.0824533353249232
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,7,0.15318399667739868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,3,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,7,0.1090826690196991
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,7,0.0795360008875529
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,7,0.0905013382434845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,7,0.07784533500671387
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,7,0.07680533329645793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,7,0.18599466482798258
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,7,0.07735466460386912
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,7,0.09840533137321472
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,7,0.12564266721407572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,7,0.07855999966462453
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,7,0.08090666433175404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,7,0.08545600374539693
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,7,0.07779199878374736
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,7,0.07727999985218048
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,15,0.07597333192825317
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,15,0.10776000221570332
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,15,0.08916266759236653
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,15,0.07542400062084198
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,15,0.15223466356595358
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,15,0.08098666866620381
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,15,0.07770666480064392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,15,0.07548800110816956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,15,0.18403732776641846
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,15,0.12293866276741028
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,15,0.09796266754468282
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,15,0.08385066191355388
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,15,0.07924800117810567
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,15,0.07744533320267995
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,15,0.07657599945863088
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,15,0.07637866834799449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,31,0.1088853379090627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,31,0.1514133314291636
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,31,0.08975467085838318
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,31,0.07967466612656911
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,31,0.0738613357146581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,31,0.0745600014925003
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,31,0.07565866907437642
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,31,0.07362666726112366
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,31,0.12019200126330058
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,31,0.17771732807159424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,31,0.09325866897900899
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,31,0.0792746643225352
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,31,0.07388799885908763
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,31,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,31,0.07115200161933899
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,63,0.1551253298918406
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,31,0.07113066812356313
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,63,0.11238400141398112
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,63,0.07028799752394359
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,63,0.0726453314224879
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,63,0.07686399916807811
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,63,0.08451199531555176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,63,0.17855467398961386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,63,0.07081600030263265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,63,0.09425066908200581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,63,0.12190399567286174
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,63,0.09354666868845622
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,127,0.1216319998105367
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,63,0.07710400223731995
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,63,0.0676693320274353
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,63,0.0691840002934138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,127,0.10136000315348308
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,63,0.06684266527493794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,63,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,127,0.17098132769266763
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,127,0.09356266260147095
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,127,0.09121599793434143
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,127,0.08851200342178345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,127,0.08569600184758504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,127,0.19049600760142008
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,127,0.08662399649620056
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,127,0.08910399675369263
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,127,0.1304266651471456
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,127,0.09971200426419576
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,127,0.07281599938869476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,127,0.07701866825421651
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,127,0.08072533210118611
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,127,0.07336000104745229
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,255,0.19799999396006265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,255,0.1393013298511505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,255,0.1232319970925649
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,255,0.11060266693433125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,255,0.10656533638636272
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,255,0.10500799616177876
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,255,0.10426132877667744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,255,0.10196266571680705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,255,0.21342933177947998
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,255,0.14201066891352335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,255,0.11251733700434367
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,255,0.09402133027712505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,255,0.09949333469072978
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,255,0.09185600280761719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,255,0.09011200070381165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,511,0.16484799981117249
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,255,0.09000533819198608
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,511,0.17746132612228394
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,511,0.15387733777364096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,511,0.2559093236923218
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,511,0.1394613285859426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,511,0.1426346699396769
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,511,0.13742933670679727
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,511,0.2635680039723714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,511,0.13929067055384317
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,511,0.13730133573214212
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,511,0.16361600160598755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,511,0.12456533312797546
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,511,0.11849066615104675
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,511,0.11682132879892985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,511,0.11529067158699036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,511,0.11568533380826314
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,1023,0.36419200897216797
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,1023,0.20826667547225952
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,1023,0.24643733104070029
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,1023,0.22292266289393106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,1023,0.2055306633313497
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,1023,0.20638400316238403
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,1023,0.2140373388926188
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,1023,0.20466667413711548
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,1023,0.36076800028483075
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,1023,0.16828266779581705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,1023,0.16505066553751627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,1023,0.18544532855351767
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,1023,0.2132586638132731
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,1023,0.16540799538294473
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,1023,0.1663146714369456
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,1023,0.17669334014256796
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,2047,0.5827146768569946
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,2047,0.3776533206303914
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,2047,0.34720532099405926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,2047,0.3566720088322957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,2047,0.34266666571299237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,2047,0.33790401617685956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,2047,0.3407946825027466
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,2047,0.34091734886169434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,2047,0.5483680168787638
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,2047,0.3120693365732829
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,2047,0.2736426591873169
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,2047,0.2837653358777364
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,2047,0.26797332366307575
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,2047,0.26612265904744464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,2047,0.26500799258550006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,2047,0.2640639940897624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,4095,1.0132479667663574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,4095,0.6769920190175375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,4095,0.6480426788330078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,4095,0.6140480041503906
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,4095,0.6291840076446533
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,4095,0.6068480014801025
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,4095,0.604202667872111
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,4095,0.602565328280131
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,4095,0.5076800187428793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,4095,0.928821325302124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,4095,0.4795786539713542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,4095,0.4586506684621175
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,4095,0.4686400095621745
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,4095,0.4652693271636963
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,4095,0.4580746491750081
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,4095,0.45950933297475177
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,8191,1.225279966990153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,8191,1.8708693186442058
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,8191,1.1862080097198486
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,8191,1.1474133332570393
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,8191,1.159765323003133
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,8191,1.1392640272776287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,8191,1.1343306700388591
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,8191,1.1312959988911946
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,8191,1.6945813496907551
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,8191,0.8897120157877604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,8191,0.861845334370931
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,8191,0.8549439907073975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,8191,0.8433067003885905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,8191,0.8453386624654134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,8191,0.8503093719482422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,8191,0.8401813507080078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,16383,2.2406133015950522
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,16383,2.2857279777526855
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,16383,2.2140960693359375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,16383,3.632447878519694
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,16383,2.2058614095052085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,16383,2.184650739034017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,16383,2.1816693941752114
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,16383,2.190234661102295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,16383,3.205434799194336
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,16383,1.6527093251546223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,16383,1.615909258524577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,16383,1.6285920143127441
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,16383,1.6104747454325359
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,16383,1.6091200510660808
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,16383,1.606592019399007
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,16383,1.614415963490804
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,32767,4.4007307688395185
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,32767,4.349552154541016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,32767,7.113402684529622
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,32767,4.318954785664876
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,32767,4.304485321044922
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,32767,4.2863359451293945
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,32767,4.281594594319661
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,128,32767,4.300581296284993
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,32767,3.16757869720459
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,32767,3.1357653935750327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,32767,6.28216552734375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,32767,3.145461400349935
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,32767,3.1477972666422525
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,32767,3.1415414810180664
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,32767,3.140783945719401
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,128,32767,3.1425065994262695
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,1,0.28993066151936847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,1,0.18735466400782266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,1,0.16051733493804932
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,1,0.14089600245157877
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,1,0.13378666838010153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,1,0.13377599914868674
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,1,0.12959999839464822
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,1,0.12980266412099203
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,1,0.22058133284250894
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,1,0.35607465108235675
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,1,0.17419199148813883
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,1,0.14683199922243753
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,1,0.13449600338935852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,3,0.18471999963124594
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,3,0.28364266951878864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,1,0.12993066509564719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,3,0.15802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,1,0.12869333227475485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,1,0.12718933820724487
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,3,0.14016000429789224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,3,0.13197333614031473
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,3,0.12795199950536093
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,3,0.35572266578674316
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,3,0.12934933106104532
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,3,0.12757333119710287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,3,0.14780267079671225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,3,0.17272533973058066
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,3,0.13409066200256348
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,3,0.22009599208831787
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,3,0.13345600167910257
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,3,0.12770133217175803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,3,0.12687466541926065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,7,0.2823839982350667
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,7,0.18525334199269614
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,7,0.1532319982846578
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,7,0.13864533106486002
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,7,0.12805333733558655
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,7,0.12665067116419473
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,7,0.13034666577974954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,7,0.1257866621017456
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,7,0.21875733137130737
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,7,0.3510773181915283
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,7,0.17242133617401123
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,7,0.14622400204340616
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,7,0.132560004790624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,7,0.13074666261672974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,7,0.12905599673589072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,7,0.12703466415405273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,15,0.1834719975789388
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,15,0.15067733327547708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,15,0.2807413339614868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,15,0.13702399532000223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,15,0.12797333796819052
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,15,0.12552533547083536
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,15,0.12437867124875386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,15,0.12339733044306438
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,15,0.126202662785848
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,15,0.21588265895843506
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,15,0.3453119993209839
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,15,0.1695893406867981
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,15,0.14411200086275736
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,15,0.1313973367214203
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,15,0.12556800246238708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,15,0.12463999787966411
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,31,0.18280533949534097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,31,0.27615465720494586
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,31,0.1499733328819275
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,31,0.12595199545224509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,31,0.12036800384521484
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,31,0.12337600191434224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,31,0.1337386667728424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,31,0.11940266688664754
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,31,0.3325120011965434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,31,0.13523733615875244
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,31,0.1166986624399821
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,31,0.1220853328704834
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,31,0.160671999057134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,31,0.21017066637674967
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,31,0.11473600069681804
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,31,0.11377066373825073
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,63,0.2799520095189412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,63,0.18650666872660318
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,63,0.151829332113266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,63,0.1344533363978068
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,63,0.12201600273450215
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,63,0.12144000331560771
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,63,0.12591466307640076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,63,0.12030399839083354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,63,0.3334346612294515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,63,0.21261332432428995
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,63,0.15933866302172342
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,63,0.1309706668059031
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,63,0.1083679993947347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,63,0.11131200194358826
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,63,0.10569600264231364
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,63,0.11849066615104675
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,127,0.20608532428741455
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,127,0.31201066573460895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,127,0.16758400201797485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,127,0.3566720088322957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,127,0.15161599715550741
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,127,0.13922666509946188
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,127,0.1356106698513031
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,127,0.13778666655222574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,127,0.14453867077827454
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,127,0.22394132614135742
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,127,0.17122133572896323
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,127,0.13301866253217062
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,127,0.14408000310262045
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,127,0.12504000465075174
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,255,0.2016106645266215
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,255,0.17838933070500693
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,255,0.36556267738342285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,127,0.12467199563980103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,127,0.12405332922935486
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,255,0.18593599398930868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,255,0.23946134249369302
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,255,0.17269867658615112
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,255,0.17152533928553262
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,255,0.17115734020868936
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,255,0.194922665754954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,255,0.24785600105921426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,255,0.4008800188700358
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,255,0.15988799929618835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,255,0.15185599525769553
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,255,0.16890132427215576
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,255,0.14855999747912088
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,255,0.14933866262435913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,511,0.31404266754786175
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,511,0.2824266751607259
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,511,0.47357332706451416
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,511,0.2472053368886312
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,511,0.23946134249369302
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,511,0.2560639977455139
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,511,0.2424586613972982
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,511,0.23676800727844238
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,511,0.24540799856185913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,511,0.19899199406305948
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,511,0.21841599543889365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,511,0.20057600736618042
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,511,0.20658665895462036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,511,0.2967733343442281
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,511,0.49436267217000324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,511,0.19936533768971762
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,1023,0.6832426389058431
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,1023,0.4535573323567708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,1023,0.38094401359558105
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,1023,0.37706132729848224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,1023,0.3908960024515788
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,1023,0.41036800543467206
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,1023,0.37274666627248126
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,1023,0.3710293372472127
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,1023,0.6751306851704916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,1023,0.39716800053914386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,1023,0.34279465675354004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,1023,0.29977067311604816
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,1023,0.3065013289451599
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,1023,0.3173440098762512
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,1023,0.2977653344472249
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,1023,0.2996319929758708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,2047,0.7404853502909342
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,2047,1.1204426288604736
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,2047,0.6962347030639648
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,2047,0.668725331624349
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,2047,0.6388586759567261
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,2047,0.6518880128860474
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,2047,0.6335893472035726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,2047,1.0592640240987141
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,2047,0.6327146689097086
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,2047,0.5137386719385783
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,2047,0.589679996172587
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,2047,0.5388213396072388
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,2047,0.501200000445048
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,2047,0.4961920181910197
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,2047,0.49159467220306396
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,2047,0.49218666553497314
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,4095,1.9941439628601074
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,4095,1.3040053049723308
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,4095,1.245029369990031
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,4095,1.2053120136260986
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,4095,1.1855200131734211
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,4095,1.169317324956258
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,4095,1.1607306798299153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,4095,1.165002663930257
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,4095,1.8290880521138508
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,4095,0.8751306533813477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,4095,0.9694613615671793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,4095,0.9205066363016764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,4095,0.8792213598887125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,4095,0.8809173107147217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,4095,0.8955893516540527
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,4095,0.8772586981455485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,8191,3.7326186498006186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,8191,2.3870347340901694
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,8191,2.3176426887512207
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,8191,2.2681760787963867
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,8191,2.212026596069336
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,8191,2.2230453491210938
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,8191,2.2065866788228354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,8191,2.242021401723226
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,8191,3.36628786722819
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,8191,1.7351466814676921
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,8191,1.6751413345336914
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,8191,1.6494666735331218
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,8191,1.6465226809183757
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,8191,1.6386613845825195
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,8191,1.6408054033915203
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,8191,1.6400052706400554
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,16383,7.218485514322917
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,16383,4.516357421875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,16383,4.428688049316406
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,16383,4.381418546040853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,16383,4.325423876444499
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,16383,4.331487973531087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,16383,4.342330614725749
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,128,16383,4.3081865310668945
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,16383,6.419610977172852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,16383,3.1649440129597983
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,16383,3.1737759908040366
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,16383,3.264373461405436
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,16383,3.1958239873250327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,16383,3.176346778869629
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,16383,3.1686347325642905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,128,16383,3.167327880859375
