framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,1,0.026736001173655193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,1,0.025450666745503742
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,1,0.025957333544890087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,1,0.025226667523384094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,1,0.027317332724730175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,1,0.025013332565625507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,1,0.025781333446502686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,1,0.02514133354028066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,1,0.025221332907676697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,3,0.02734400083621343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,3,0.02645866572856903
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,1,0.025983999172846477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,1,0.02516799916823705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,1,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,3,0.02532266577084859
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,3,0.025861332813898723
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,3,0.025600001215934753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,3,0.024986666937669117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,3,0.02585600068171819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,3,0.026378666361172993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,3,0.025792000194390614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,3,0.026133333643277485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,3,0.0258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,1,0.025098666548728943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,3,0.025045332809289295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,3,0.025013332565625507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,3,0.02492800106604894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,7,0.026320000489552815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,1,0.024911999702453613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,7,0.02718399961789449
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,7,0.02476266771554947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,7,0.026975999275843304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,7,0.030224000414212544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,7,0.025477332373460133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,7,0.025493333737055462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,7,0.02603200078010559
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,7,0.02548266698916753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,7,0.10014399886131287
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,7,0.024853333830833435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,7,0.025631998976071674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,7,0.025429333249727886
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,15,0.026170666019121807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,15,0.025946666797002155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,15,0.025472000241279602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,15,0.02516266703605652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,15,0.02510400116443634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,7,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,15,0.02518933266401291
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,15,0.025194667279720306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,15,0.027493332823117573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,15,0.025626666843891144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,15,0.025066666305065155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,15,0.02586666742960612
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,15,0.02535466601451238
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,15,0.025349333882331848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,31,0.025914666553338368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,15,0.02515200028816859
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,31,0.02754133443037669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,31,0.02476799984773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,31,0.025397333006064098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,31,0.030554667115211487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,31,0.025077333052953083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,31,0.026650667190551758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,31,0.02515733242034912
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,31,0.02491733431816101
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,31,0.025850666066010792
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,31,0.025279998779296875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,31,0.025029333929220837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,31,0.025013332565625507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,31,0.025013332565625507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,63,0.025397333006064098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,63,0.024645333488782246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,63,0.0240639994541804
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,63,0.02385066697994868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,63,0.02383466561635335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,63,0.02521066615978877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,63,0.023898666103680927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,63,0.024197332561016083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,63,0.024293333292007446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,63,0.024682665864626568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,63,0.024069334069887798
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,63,0.02386666586001714
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,63,0.023685333629449207
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,127,0.02807466685771942
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,63,0.023621333142121632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,127,0.02693866689999898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,127,0.025941332181294758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,127,0.02573866645495097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,127,0.026330667237440746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,127,0.025957333544890087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,127,0.02573866645495097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,127,0.027957332630952198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,127,0.026842666169007618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,127,0.02589333305756251
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,127,0.02632533262173335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,127,0.025973332424958546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,127,0.025861332813898723
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,127,0.025701334079106648
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,255,0.02826666583617528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,255,0.02736533433198929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,255,0.026341333985328674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,255,0.026021334032217663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,255,0.026320000489552815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,255,0.02603200078010559
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,255,0.026863999664783478
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,255,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,255,0.026101333399613697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,255,0.026917333404223125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,255,0.02606400102376938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,255,0.026021334032217663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,255,0.026074667771657307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,255,0.026005332668622334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,511,0.028912000358104706
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,511,0.02712533374627431
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,511,0.02757866680622101
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,511,0.02644266684850057
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,511,0.027002667387326557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,511,0.02661866694688797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,511,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,511,0.029103999336560566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,511,0.027664000789324444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,511,0.02918400118748347
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,511,0.026447998980681103
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,511,0.026736001173655193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,511,0.02699200063943863
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,1023,0.028218666712443035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,1023,0.02773333340883255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,1023,0.030000001192092896
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,1023,0.02808533360560735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,1023,0.02888533224662145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,1023,0.02769600103298823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,511,0.026426665484905243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,1023,0.027930667002995808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,1023,0.028197333216667175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,1023,0.02775999903678894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,1023,0.027701333165168762
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,1023,0.027749332288901012
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,1023,0.02790933350721995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,1023,0.03010133405526479
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,1023,0.028736000259717304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,2047,0.03307733436425527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,2047,0.030613332986831665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,2047,0.031504000226656594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,2047,0.030320001145203907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,2047,0.030426666140556335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,2047,0.030224000414212544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,2047,0.03164800008138021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,2047,0.03299200038115183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,2047,0.030597334106763203
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,2047,0.03046933313210805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,2047,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,2047,0.030026666820049286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,2047,0.03044266750415166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,2047,0.03030933439731598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,4095,0.03842133283615112
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,4095,0.03583999971548716
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,4095,0.03475199888149897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,4095,0.034501334031422935
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,4095,0.034661332766215004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,4095,0.034485332667827606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,4095,0.03432533393303553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,4095,0.03475733349720637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,4095,0.03642666588226954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,4095,0.03748266647259394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,4095,0.034661332766215004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,4095,0.03455466777086258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,4095,0.03454400102297465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,8191,0.039221333960692085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,4095,0.034474665919939675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,8191,0.03726933399836222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,8191,0.03677333394686381
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,8191,0.03766933331886927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,8191,0.036576000352700554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,8191,0.03654933224121729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,8191,0.03664533297220866
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,8191,0.039274667700131737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,8191,0.037871999045213066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,8191,0.03729599962631861
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,8191,0.03678400069475174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,16383,0.048656001687049866
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,8191,0.036517334481080375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,8191,0.036464000741640724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,16383,0.045050665736198425
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,16383,0.04631466666857401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,16383,0.04478933413823446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,8191,0.03659733384847641
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,16383,0.04451199869314829
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,16383,0.04445866743723551
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,16383,0.04437333345413208
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,16383,0.04641599953174591
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,16383,0.04489600161711375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,16383,0.04461333155632019
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,16383,0.04428266485532125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,16383,0.04853333532810211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,16383,0.04490133126576742
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,16383,0.04447466631730398
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,32767,0.0561653325955073
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,32767,0.052000001072883606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,32767,0.04913066824277242
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,32767,0.04855466882387797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,32767,0.04808000226815542
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,32767,0.056986664732297264
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,32767,0.05067733426888784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,32767,0.04808533191680908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,32767,0.048623998959859215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,32767,0.049226666490236916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,32767,0.04818666477998098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,32767,0.05705599983533224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,32767,0.04807466765244802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,32767,0.048138668139775596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,65535,0.06769600013891856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,65535,0.06293866535027821
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,65535,0.06187200049559275
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,65535,0.06268799801667531
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,65535,0.061792001128196716
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,65535,0.06141866743564606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,65535,0.06478400031725566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,65535,0.0677706648906072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,65535,0.06464000046253204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,65535,0.06338133414586385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,65535,0.062080000837643944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,65535,0.06149866680304209
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,65535,0.06188266475995382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,65535,0.06117866436640421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,2,131071,0.0821919987599055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,1,131071,0.08619200189908345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,4,131071,0.0807360013326009
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,8,131071,0.08060266574223836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,16,131071,0.07957866787910461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,32,131071,0.07924266656239827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1,1,64,131071,0.07830399771531422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,1,131071,0.08615466952323914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,4,131071,0.08082666496435802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,2,131071,0.08258133133252461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,8,131071,0.0795306662718455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,32,131071,0.07935999830563863
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,16,131071,0.07961600025494893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1,1,64,131071,0.07890133559703827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,1,0.02757866680622101
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,1,0.02665599932273229
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,1,0.026373334228992462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,1,0.025983999172846477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,1,0.02609066665172577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,1,0.025962665677070618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,1,0.02661866694688797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,1,0.027642667293548584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,1,0.025941332181294758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,1,0.02625600000222524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,1,0.026026666164398193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,1,0.025722667574882507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,3,0.027514666318893433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,1,0.02593066543340683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,3,0.026629333694775898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,3,0.025616000096003216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,3,0.025706666211287182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,1,0.025759999950726826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,3,0.026202666262785595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,3,0.025722667574882507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,3,0.027248000105222065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,3,0.025781333446502686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,3,0.02643733223279317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,3,0.026159999271233875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,3,0.025888000925381977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,3,0.025637333591779072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,3,0.0259253333012263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,7,0.02646933247645696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,3,0.025807999074459076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,7,0.027242665489514668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,7,0.02607999990383784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,7,0.025610665480295818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,7,0.02548266698916753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,7,0.025642665723959606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,7,0.025727999707063038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,7,0.02735466758410136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,7,0.026047999660174053
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,7,0.025573333104451496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,7,0.025631998976071674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,7,0.02809600035349528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,7,0.025573333104451496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,7,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,15,0.027104000250498455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,15,0.02624000112215678
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,15,0.025765334566434223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,15,0.025434667865435284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,15,0.025242666403452556
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,15,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,15,0.025333332518736523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,15,0.02625600000222524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,15,0.02696000039577484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,15,0.025610665480295818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,15,0.025466665625572205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,15,0.02515733242034912
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,15,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,31,0.026752000053723652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,31,0.026181332767009735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,15,0.025370667378107708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,31,0.025216000775496166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,31,0.025226667523384094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,31,0.02548266698916753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,31,0.024885334074497223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,31,0.02514133354028066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,31,0.026757332185904186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,31,0.025962665677070618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,31,0.025557334224383037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,31,0.025258667767047882
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,31,0.025253333151340485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,63,0.025605333348115284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,31,0.02499733368555705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,63,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,63,0.034917332231998444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,31,0.025253333151340485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,63,0.024122667809327442
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,63,0.024069334069887798
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,63,0.02405333270629247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,63,0.023754666248957317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,63,0.05100800096988678
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,63,0.024634666740894318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,63,0.02441066751877467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,63,0.023936000963052113
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,63,0.0239680012067159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,63,0.02402133246262868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,63,0.02385066697994868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,127,0.027429332335789997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,127,0.028586665789286297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,127,0.027189334233601887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,127,0.026816000541051228
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,127,0.026250667870044708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,127,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,127,0.02863999952872594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,127,0.026714667677879333
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,127,0.026213333010673523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,127,0.02681066592534383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,127,0.027445333699385326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,127,0.02605333427588145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,127,0.026362667481104534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,127,0.026421333352724712
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,255,0.02887466549873352
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,255,0.02743999908367793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,255,0.026911998788515728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,255,0.026394667724768322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,255,0.026394667724768322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,255,0.026501332720120747
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,255,0.026719999810059864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,255,0.029093332588672638
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,255,0.027503999571005504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,255,0.026842666169007618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,255,0.02699733277161916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,255,0.02643733223279317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,255,0.026474667092164356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,255,0.026378666361172993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,511,0.028277332584063213
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,511,0.028031999866167705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,511,0.027535999814669292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,511,0.02754133443037669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,511,0.029978667696317036
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,511,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,511,0.02739199995994568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,511,0.028384000062942505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,511,0.029882666965325672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,511,0.028064000109831493
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,511,0.027306665976842243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,1023,0.032229334115982056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,511,0.027744000156720478
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,511,0.02754666656255722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,511,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,1023,0.030661332110563915
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,1023,0.029802667597929638
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,1023,0.029680001238981884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,1023,0.029317334294319153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,1023,0.032474666833877563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,1023,0.029370665550231934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,1023,0.029232000311215717
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,1023,0.029834667841593426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,1023,0.029477333029111225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,1023,0.030906667311986286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,1023,0.029669334491093952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,1023,0.029258665939172108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,1023,0.029205332199732464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,2047,0.03643200049797694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,2047,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,2047,0.03249600032965342
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,2047,0.033887999753157295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,2047,0.03242666771014532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,2047,0.03245333333810171
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,2047,0.03551999976237615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,2047,0.03430933256944021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,2047,0.032586666444937386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,2047,0.03320533285538355
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,2047,0.032511999209721885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,2047,0.03257066756486893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,2047,0.032431999842325844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,4095,0.037263999382654824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,2047,0.032255999743938446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,4095,0.03619733452796936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,4095,0.03490666548411051
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,4095,0.03505066782236099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,4095,0.03439466655254364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,4095,0.034661332766215004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,4095,0.03461866577466329
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,4095,0.0373333344856898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,4095,0.03514133393764496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,4095,0.03492266684770584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,4095,0.03452266752719879
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,4095,0.035717333356539406
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,4095,0.03460799902677536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,8191,0.042090664307276406
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,4095,0.03472533325354258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,8191,0.04012266546487808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,8191,0.054005334774653115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,8191,0.038949333131313324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,8191,0.038880000511805214
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,8191,0.03921066721280416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,8191,0.04008533308903376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,8191,0.04215466479460398
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,8191,0.03920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,8191,0.03885866701602936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,8191,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,8191,0.045594667394955955
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,8191,0.03895466774702072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,8191,0.039290666580200195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,16383,0.05332799752553304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,16383,0.048165331284205117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,16383,0.04574933151404063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,16383,0.04492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,16383,0.04484266539414724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,16383,0.04433600107828776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,16383,0.05338666836420695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,16383,0.04669866462548574
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,16383,0.04762133459250132
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,16383,0.04480533301830292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,16383,0.044341335693995156
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,16383,0.04455466568470001
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,16383,0.044666667779286705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,32767,0.06437333424886067
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,32767,0.0612960010766983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,16383,0.04470400015513102
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,32767,0.05977599819501241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,32767,0.05830933153629303
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,32767,0.057855998476346336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,32767,0.058143998185793556
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,32767,0.05791999896367391
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,32767,0.05959466596444448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,32767,0.06126399834950765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,32767,0.06431999802589417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,32767,0.058650667468706764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,32767,0.05829866727193197
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,32767,0.05821333328882853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,65535,0.08197866876920064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,32767,0.05751466751098633
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,65535,0.07859733204046886
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,65535,0.07689600189526875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,65535,0.07706133524576823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,65535,0.0759200006723404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,65535,0.07506133119265239
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,65535,0.07509866853555043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,65535,0.08249600231647491
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,65535,0.0790773332118988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,65535,0.07715733349323273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,65535,0.07625066737333934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,65535,0.07574933270613353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,65535,0.07612800101439159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,65535,0.07522666454315186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,1,131071,0.11708266536394756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,4,131071,0.11174933115641277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,2,131071,0.11316800117492676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,16,131071,0.10983467102050781
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,8,131071,0.11008532842000325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,32,131071,0.10995733737945557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,2,131071,0.11339733004570007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,1,131071,0.11649066209793091
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,2,1,64,131071,0.1097920040289561
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,8,131071,0.11039466659228007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,4,131071,0.11129599809646606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,16,131071,0.1092639962832133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,32,131071,0.10917866230010986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,2,1,64,131071,0.10889066259066264
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,1,0.028016000986099243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,1,0.02720000098148982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,1,0.02701866626739502
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,1,0.026346666117509205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,1,0.02651199946800868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,1,0.02638400097688039
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,1,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,1,0.027935999135176342
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,1,0.02718399961789449
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,1,0.02657066782315572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,1,0.026485333840052288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,1,0.0262773334980011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,1,0.02629866699377696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,1,0.026330667237440746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,3,0.027610667049884796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,3,0.027104000250498455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,3,0.026602665583292644
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,3,0.026149332523345947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,3,0.02611200014750163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,3,0.02611200014750163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,3,0.026181332767009735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,3,0.028064000109831493
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,3,0.026533332963784535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,3,0.03814399987459183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,3,0.02626666675011317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,3,0.025973332424958546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,3,0.02622933437426885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,3,0.026015999416510265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,7,0.027034667630990345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,7,0.028736000259717304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,7,0.025983999172846477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,7,0.02658133457104365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,7,0.02603200078010559
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,7,0.02770666778087616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,7,0.02589333305756251
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,7,0.027109332382678986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,7,0.026362667481104534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,7,0.026021334032217663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,7,0.02603200078010559
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,7,0.026101333399613697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,7,0.025775998830795288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,7,0.025941332181294758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,15,0.027093333502610523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,15,0.02679466704527537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,15,0.02604266752799352
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,15,0.0259253333012263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,15,0.025631998976071674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,15,0.025775998830795288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,15,0.02568000058333079
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,15,0.027445333699385326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,15,0.026517334083716076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,15,0.026309333741664886
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,15,0.025722667574882507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,15,0.025818665822347004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,15,0.02571733295917511
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,15,0.02573866645495097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,31,0.02699200063943863
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,31,0.026378666361172993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,31,0.02566933383544286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,31,0.02531733363866806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,31,0.02531733363866806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,31,0.025701334079106648
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,31,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,31,0.026858667532602947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,31,0.026426665484905243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,31,0.025285333395004272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,31,0.025909334421157837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,31,0.025407999753952026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,31,0.025333332518736523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,31,0.02565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,63,0.025248001019159954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,63,0.024698667228221893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,63,0.0484746644894282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,63,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,63,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,63,0.024538666009902954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,63,0.024271999796231587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,63,0.02510933329661687
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,63,0.026314665873845417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,63,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,63,0.02438933402299881
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,63,0.02651199946800868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,63,0.024341332415739696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,63,0.024405332903067272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,127,0.029125332832336426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,127,0.027855999767780304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,127,0.027445333699385326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,127,0.027109332382678986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,127,0.02914133419593175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,127,0.02658133457104365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,127,0.026874666412671406
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,127,0.02640533447265625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,127,0.027727998793125153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,127,0.027056001126766205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,127,0.027034667630990345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,127,0.027104000250498455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,127,0.02657066782315572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,127,0.026858667532602947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,255,0.02784000088771184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,255,0.028357334434986115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,255,0.029557332396507263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,255,0.027530667682488758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,255,0.02731200059254964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,255,0.027136000494162243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,255,0.026901334524154663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,255,0.029898665845394135
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,255,0.02770666778087616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,255,0.02834133307139079
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,255,0.02740799884001414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,255,0.027386667827765148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,255,0.027237333357334137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,255,0.02714666724205017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,511,0.03203733265399933
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,511,0.030458666384220123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,511,0.02958400050799052
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,511,0.029296000798543293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,511,0.0288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,511,0.028965334097544353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,511,0.028922667105992634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,511,0.03197866678237915
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,511,0.03044266750415166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,511,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,511,0.029114666084448498
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,511,0.028549333413441975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,511,0.029120000700155895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,511,0.029002666473388672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,1023,0.03489600121974945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,1023,0.033370666205883026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,1023,0.03187733391920725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,1023,0.03209066639343897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,1023,0.031632001201311745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,1023,0.031258667508761086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,1023,0.03164800008138021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,1023,0.03579733272393545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,1023,0.03213333338499069
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,1023,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,1023,0.031770666440327965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,1023,0.031445334355036415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,1023,0.03160533308982849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,1023,0.031258667508761086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,2047,0.03677866607904434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,2047,0.0352906659245491
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,2047,0.03403733422358831
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,2047,0.033728001018365227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,2047,0.033600000043710075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,2047,0.033770665526390076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,2047,0.03366933266321818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,2047,0.03659733384847641
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,2047,0.03514666606982549
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,2047,0.034058667719364166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,2047,0.03376533339420954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,2047,0.033770665526390076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,2047,0.033520000676314034
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,4095,0.040607998768488564
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,2047,0.033701332906881966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,4095,0.04012266546487808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,4095,0.038047999143600464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,4095,0.03835200021664301
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,4095,0.037776000797748566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,4095,0.037978666524092354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,4095,0.0374293327331543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,4095,0.04853333532810211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,4095,0.03841066608826319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,4095,0.03912533322970072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,4095,0.037871999045213066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,4095,0.03774933268626531
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,4095,0.03772799919048945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,8191,0.04569066564242045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,8191,0.04906133313973745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,4095,0.03804266701141993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,8191,0.043466667334238686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,8191,0.04381333291530609
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,8191,0.04292800029118856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,8191,0.042677332957585655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,8191,0.042709335684776306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,8191,0.0496373325586319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,8191,0.045226668318112694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,8191,0.043322667479515076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,8191,0.043866669138272606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,8191,0.042821332812309265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,8191,0.0448586642742157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,8191,0.04264533519744873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,16383,0.06274133423964183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,16383,0.05840000013510386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,16383,0.05724266668160757
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,16383,0.0568800022204717
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,16383,0.05587733288606008
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,16383,0.056176001826922096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,16383,0.05614933371543884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,16383,0.06242666641871134
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,16383,0.05861866474151611
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,16383,0.057130664587020874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,16383,0.056832000613212585
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,16383,0.056517332792282104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,16383,0.055919999877611794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,16383,0.05555733541647593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,32767,0.08038400113582611
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,32767,0.07499200105667114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,32767,0.07684266567230225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,32767,0.0745066652695338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,32767,0.07418666779994965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,32767,0.07321600119272868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,32767,0.07362133264541626
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,32767,0.081386665503184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,32767,0.07693866888682048
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,32767,0.07495466868082683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,32767,0.0738453318675359
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,32767,0.07437333464622498
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,32767,0.07316266496976216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,32767,0.07400000095367432
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,65535,0.11513599753379822
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,65535,0.11131733655929565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,65535,0.10940266648928325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,65535,0.10837866862614949
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,65535,0.10779733459154765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,65535,0.10754666725794475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,65535,0.1071626643339793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,65535,0.11479999621709187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,65535,0.11032533645629883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,65535,0.10962667067845662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,65535,0.10761066277821858
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,65535,0.10750400026639302
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,65535,0.10712533195813496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,65535,0.10632000366846721
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,1,131071,0.18316266934076944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,2,131071,0.17797333002090454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,4,131071,0.17620799938837686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,8,131071,0.17548267046610513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,16,131071,0.17450666427612305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,32,131071,0.17358932892481485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,4,1,64,131071,0.17354132731755575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,1,131071,0.18273067474365234
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,4,131071,0.17626667022705078
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,2,131071,0.17861332496007284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,8,131071,0.1751733422279358
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,16,131071,0.17504000663757324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,32,131071,0.1747200091679891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,1,0.028624000648657482
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,4,1,64,131071,0.17394665877024332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,1,0.02770666778087616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,1,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,1,0.02720000098148982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,1,0.027050666511058807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,1,0.02697066714366277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,1,0.028490667541821797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,1,0.027978666126728058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,1,0.02697066714366277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,1,0.04276266694068909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,1,0.02700799951950709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,1,0.026874666412671406
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,1,0.026661333938439686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,1,0.026789332429567974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,3,0.0284853329261144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,3,0.027797333896160126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,3,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,3,0.03105599929889043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,3,0.026778665681680042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,3,0.0268053337931633
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,3,0.026565333207448322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,3,0.02850666642189026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,3,0.02720533311367035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,3,0.026917333404223125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,3,0.02672533442576726
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,3,0.027621333797772724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,3,0.026730666557947796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,3,0.026741333305835724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,7,0.02826133370399475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,7,0.027477333943049114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,7,0.027093333502610523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,7,0.0321066677570343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,7,0.02640533447265625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,7,0.026506667335828144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,7,0.0284853329261144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,7,0.02749866743882497
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,7,0.027061333258946735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,7,0.026586666703224182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,7,0.026687999566396076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,7,0.026533332963784535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,7,0.026554666459560394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,7,0.026485333840052288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,15,0.028192001084486645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,15,0.027087998886903126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,15,0.02621866762638092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,15,0.026565333207448322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,15,0.0264533335963885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,15,0.026005332668622334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,15,0.026202666262785595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,15,0.027952000498771667
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,15,0.027322667340437572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,15,0.02682666728893916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,15,0.026421333352724712
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,15,0.026320000489552815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,15,0.026288000245889027
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,15,0.026149332523345947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,31,0.02754666656255722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,31,0.026506667335828144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,31,0.02649066597223282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,31,0.026165333886941273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,31,0.026074667771657307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,31,0.025829332570234936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,31,0.0259253333012263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,31,0.02736533433198929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,31,0.026591998835404713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,31,0.026378666361172993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,31,0.02609066665172577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,31,0.02604266752799352
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,31,0.02619733413060506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,31,0.025850666066010792
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,63,0.026719999810059864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,63,0.02571733295917511
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,63,0.025263999899228413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,63,0.02493866781393687
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,63,0.02479466547568639
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,63,0.024821333587169647
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,63,0.024800000091393787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,63,0.026533332963784535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,63,0.02589333305756251
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,63,0.024906667570273083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,63,0.025221332907676697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,63,0.024879999458789825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,63,0.02500266581773758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,63,0.02492800106604894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,127,0.030181333422660828
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,127,0.028832000990708668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,127,0.028143999477227528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,127,0.027866666515668232
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,127,0.027765333652496338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,127,0.02734400083621343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,127,0.030053332448005676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,127,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,127,0.028864001234372456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,127,0.028255999088287354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,127,0.02788266787926356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,127,0.027589333554108936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,127,0.0276053324341774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,255,0.03209600100914637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,127,0.029701332251230877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,255,0.029717333614826202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,255,0.030378667016824085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,255,0.029418667157491047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,255,0.03281066566705704
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,255,0.02886933336655299
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,255,0.028917332490285236
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,255,0.032085334261258446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,255,0.03029866764942805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,255,0.02958933264017105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,255,0.028922667105992634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,255,0.029279999434947968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,255,0.02886933336655299
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,255,0.02882133424282074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,511,0.03483733286460241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,511,0.03290133426586787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,511,0.03173866619666418
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,511,0.031888000667095184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,511,0.031130666534105938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,511,0.03147733211517334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,511,0.031194667021433514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,511,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,511,0.03289599965016047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,511,0.03200000027815501
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,511,0.031146667897701263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,511,0.03173333406448364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,511,0.030965333183606465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,511,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,1023,0.03708266715208689
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,1023,0.03379199902216593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,1023,0.034714666505654655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,1023,0.03363200028737386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,1023,0.03323733309904734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,1023,0.033402666449546814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,1023,0.033173332611719765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,1023,0.03696000079313914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,1023,0.03494933247566223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,1023,0.03382399926582972
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,1023,0.033589333295822144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,1023,0.03333866596221924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,1023,0.03296533226966858
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,2047,0.040463998913764954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,1023,0.03334933271010717
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,2047,0.03806400050719579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,2047,0.03871466716130575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,2047,0.03749866783618927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,2047,0.03746666759252548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,2047,0.05242133140563965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,2047,0.0373333344856898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,2047,0.04044266790151596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,2047,0.039221333960692085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,2047,0.03788800040880839
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,2047,0.03771200031042099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,2047,0.037418665985266365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,2047,0.037402667105197906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,2047,0.037392000357309975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,4095,0.04951466619968414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,4095,0.044079999128977455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,4095,0.043194666504859924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,4095,0.042405332128206887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,4095,0.0421013335386912
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,4095,0.04195199906826019
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,4095,0.041749333341916404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,4095,0.05107733110586802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,4095,0.04433066646258036
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,4095,0.043231998880704246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,4095,0.042677332957585655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,4095,0.04201066493988037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,4095,0.04206933577855428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,4095,0.04182399809360504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,8191,0.062128002444903054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,8191,0.05875733494758606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,8191,0.056874667604764305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,8191,0.05644799768924713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,8191,0.055402666330337524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,8191,0.055530667304992676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,8191,0.05500266452630361
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,8191,0.05886933207511902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,8191,0.05600533386071523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,8191,0.05707733333110809
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,8191,0.06182933350404104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,8191,0.0558240016301473
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,8191,0.055120001236597695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,8191,0.05592533449331919
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,16383,0.08119999865690868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,16383,0.0759093314409256
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,16383,0.07474666833877563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,16383,0.0732426643371582
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,16383,0.07315200070540111
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,16383,0.0805866668621699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,16383,0.07276799778143565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,16383,0.07634666562080383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,16383,0.07282133400440216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,16383,0.07411733269691467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,16383,0.07362666726112366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,16383,0.07274133463700612
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,16383,0.07314133147398631
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,16383,0.07249066730340321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,32767,0.11049600442250569
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,32767,0.10884799559911092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,32767,0.11515200138092041
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,32767,0.10645866394042969
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,32767,0.1074133316675822
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,32767,0.10777067144711812
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,32767,0.10661333799362183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,32767,0.11453333497047424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,32767,0.10969600081443787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,32767,0.1077280044555664
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,32767,0.10700266559918721
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,32767,0.10891733566919963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,32767,0.10662399729092915
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,32767,0.10710400342941284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,65535,0.1853920022646586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,65535,0.1807146668434143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,65535,0.177130659421285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,65535,0.17609065771102905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,65535,0.17562667528788248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,65535,0.17537067333857217
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,65535,0.17549333969751993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,65535,0.18434667587280273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,65535,0.18013866742451987
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,65535,0.17760533094406128
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,65535,0.17628266414006552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,65535,0.1754186749458313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,65535,0.17522132396697998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,65535,0.17523733774820963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,2,131071,0.31084267298380536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,1,131071,0.3162826697031657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,4,131071,0.308570663134257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,8,131071,0.30714666843414307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,16,131071,0.30509332815806073
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,32,131071,0.30640532573064166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,8,1,64,131071,0.3058026631673177
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,2,131071,0.31060266494750977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,1,131071,0.3163040081659953
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,8,131071,0.3065226674079895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,4,131071,0.30869332949320477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,16,131071,0.3060693343480428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,32,131071,0.3058026631673177
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,8,1,64,131071,0.30501333872477215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,1,0.029989334444204967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,1,0.028677334388097126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,1,0.028160000840822857
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,1,0.027877333263556164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,1,0.02900800108909607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,1,0.02792000025510788
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,1,0.02794666588306427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,1,0.030069333811601002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,1,0.029002666473388672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,1,0.028490667541821797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,1,0.028143999477227528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,1,0.027935999135176342
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,1,0.02792533238728841
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,3,0.029690665503342945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,1,0.027893332143624622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,3,0.02874133239189784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,3,0.02829333394765854
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,3,0.0281333327293396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,3,0.02788266787926356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,3,0.027765333652496338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,3,0.04446400205294291
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,3,0.029882666965325672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,3,0.028890666862328846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,3,0.028362666567166645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,3,0.028037334481875103
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,3,0.027957332630952198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,3,0.02769600103298823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,7,0.029637334247430164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,3,0.02758399893840154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,7,0.028618666032950085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,7,0.028170667588710785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,7,0.02784000088771184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,7,0.027653334041436512
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,7,0.027786667148272198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,7,0.02758399893840154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,7,0.029616000751654308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,7,0.028565332293510437
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,7,0.028117333849271137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,7,0.027893332143624622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,7,0.027690666417280834
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,7,0.0276853342851003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,7,0.027535999814669292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,15,0.028357334434986115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,15,0.029258665939172108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,15,0.027445333699385326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,15,0.027722666660944622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,15,0.02738133321205775
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,15,0.027429332335789997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,15,0.02718399961789449
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,15,0.02938133229811986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,15,0.028362666567166645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,15,0.027514666318893433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,15,0.02784000088771184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,15,0.027248000105222065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,15,0.029029332101345062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,15,0.02741866558790207
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,31,0.028880000114440918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,31,0.027850667635599773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,31,0.026975999275843304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,31,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,31,0.026917333404223125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,31,0.02693866689999898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,31,0.026928000152111053
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,31,0.028773332635561626
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,31,0.027930667002995808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,31,0.027466667195161183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,31,0.02700799951950709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,31,0.026885333160559338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,31,0.026746665438016255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,31,0.026975999275843304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,63,0.02792000025510788
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,63,0.026954665780067444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,63,0.02606400102376938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,63,0.026362667481104534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,63,0.025653332471847534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,63,0.025973332424958546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,63,0.025781333446502686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,63,0.02792000025510788
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,63,0.026954665780067444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,63,0.02613866577545802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,63,0.026320000489552815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,63,0.02589333305756251
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,63,0.02585600068171819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,127,0.032442666590213776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,127,0.030858665704727173
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,63,0.0258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,127,0.030224000414212544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,127,0.030095999439557392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,127,0.029861333469549816
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,127,0.02934933453798294
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,127,0.030741333961486816
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,127,0.029178666571776073
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,127,0.030063999195893604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,127,0.03257599969704946
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,127,0.029951999584833782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,127,0.02961066613594691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,127,0.0301706666747729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,127,0.029370665550231934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,255,0.03433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,255,0.03260799994071325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,255,0.032085334261258446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,255,0.03179733455181122
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,255,0.031632001201311745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,255,0.03162133445342382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,255,0.031514666974544525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,255,0.03430933256944021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,255,0.031957333286603294
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,255,0.031871999303499855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,255,0.03249066571394602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,255,0.031717332700888314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,255,0.031583999594052635
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,511,0.037461332976818085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,255,0.031632001201311745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,511,0.05021866659323374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,511,0.03404266635576884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,511,0.03339733431736628
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,511,0.0335413341720899
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,511,0.033930666744709015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,511,0.03355200091997782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,511,0.037274666130542755
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,511,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,511,0.03380800038576126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,511,0.033887999753157295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,511,0.03340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,511,0.0335359995563825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,511,0.033359999457995095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,1023,0.0415040006240209
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,1023,0.0391893337170283
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,1023,0.038202665746212006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,1023,0.03821333249409994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,1023,0.038058665891488395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,1023,0.037818667789300285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,1023,0.037818667789300285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,1023,0.04278933505217234
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,1023,0.039450667798519135
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,1023,0.03822933385769526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,1023,0.03825599948565165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,1023,0.03807466725508372
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,1023,0.03809066613515218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,1023,0.037647999823093414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,2047,0.05067199965318044
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,2047,0.04576533536116282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,2047,0.04347200194994608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,2047,0.04320533573627472
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,2047,0.042863999803860985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,2047,0.043050666650136314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,2047,0.04284266630808512
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,2047,0.04609066744645437
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,2047,0.04483733574549357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,2047,0.050794666012128196
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,2047,0.04330133398373922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,2047,0.042837331692377724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,2047,0.04284266630808512
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,2047,0.04312000175317129
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,4095,0.06260799864927928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,4095,0.058677335580190025
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,4095,0.057029331723848976
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,4095,0.056133334835370384
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,4095,0.055573334296544395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,4095,0.05603733162085215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,4095,0.05552533268928528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,4095,0.06268266836802165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,4095,0.058261334896087646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,4095,0.05671999851862589
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,4095,0.05624000231424967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,4095,0.05490666627883911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,4095,0.05532266696294149
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,8191,0.08247466882069905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,4095,0.05625066657861074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,8191,0.07763733466466267
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,8191,0.07500266532103221
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,8191,0.0739573339621226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,8191,0.07340799768765767
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,8191,0.07267733414967854
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,8191,0.07255466779073079
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,8191,0.08267199993133545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,8191,0.07786133388678233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,8191,0.07426666716734569
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,8191,0.07650133470694225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,8191,0.07362133264541626
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,8191,0.07307733098665874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,8191,0.07267199953397115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,16383,0.11528533697128296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,16383,0.11020267009735107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,16383,0.10897066195805867
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,16383,0.10775466759999593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,16383,0.1062506635983785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,16383,0.10643733541170756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,16383,0.10674666364987691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,16383,0.11515200138092041
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,16383,0.11095466216405232
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,16383,0.10814399520556132
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,16383,0.10725333293279012
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,16383,0.10758933424949646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,16383,0.10572800040245056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,32767,0.18638932704925537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,16383,0.10623466968536377
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,32767,0.1797599991162618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,32767,0.17733865976333618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,32767,0.17633599042892456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,32767,0.17570134003957114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,32767,0.1743626594543457
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,32767,0.174618661403656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,32767,0.1864373286565145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,32767,0.18020800749460855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,32767,0.17524266242980957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,32767,0.17680533727010092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,32767,0.17695999145507812
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,32767,0.17463467518488565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,32767,0.17358400424321493
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,65535,0.32080533107121784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,65535,0.3125813404719035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,65535,0.3087199926376343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,65535,0.3071253299713135
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,65535,0.3081653316815694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,65535,0.313098669052124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,65535,0.3190400004386902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,65535,0.30580800771713257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,65535,0.3049760063489278
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,65535,0.3099306623140971
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,65535,0.30693866809209186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,65535,0.30664000908533734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,65535,0.3065440058708191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,65535,0.3068213264147441
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,1,131071,0.5848000049591064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,4,131071,0.5740693410237631
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,16,131071,0.571727991104126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,32,131071,0.5716373523076376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,2,131071,0.5773439804712931
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,16,1,64,131071,0.5713760058085123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,8,131071,0.5715786616007487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,1,131071,0.5838720003763834
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,2,131071,0.5777866840362549
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,1,0.03341866781314214
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,4,131071,0.5739253362019857
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,16,1,64,131071,0.5707733233769735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,32,131071,0.5707573493321737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,16,131071,0.5719360113143921
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,8,131071,0.5726240078608195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,1,0.03161599983771642
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,1,0.030784000953038532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,1,0.03046933313210805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,1,0.03047466774781545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,1,0.03048533449570338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,1,0.030767999589443207
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,1,0.03013866643110911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,1,0.033471999069054924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,1,0.03160533308982849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,1,0.03374933451414108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,1,0.030213333666324615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,1,0.030074665943781536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,1,0.03011200080315272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,3,0.033370666205883026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,3,0.031445334355036415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,3,0.030394665896892548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,3,0.03013866643110911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,3,0.033200000723203026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,3,0.030960001051425934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,3,0.03017599880695343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,3,0.030234667162100475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,3,0.030640001098314922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,3,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,3,0.03149333347876867
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,3,0.03029866764942805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,3,0.030245333909988403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,7,0.03310933212439219
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,3,0.030373332401116688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,7,0.031290667752424874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,7,0.030565333863099415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,7,0.03014400104681651
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,7,0.0301706666747729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,7,0.030095999439557392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,7,0.030005333324273426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,7,0.033301333586374916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,7,0.03143466760714849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,7,0.03049066662788391
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,7,0.029951999584833782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,7,0.03013866643110911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,7,0.029946667452653248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,7,0.030063999195893604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,15,0.032799998919169106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,15,0.030906667311986286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,15,0.030400000512599945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,15,0.02956266701221466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,15,0.029738667110602062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,15,0.02961066613594691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,15,0.029626667499542236
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,15,0.03271466741959254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,15,0.0308693324526151
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,15,0.030271999537944794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,15,0.030095999439557392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,15,0.029648000995318096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,15,0.02958400050799052
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,15,0.029525332152843475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,31,0.03226666649182638
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,31,0.03050133337577184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,31,0.029717333614826202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,31,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,31,0.02934933453798294
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,31,0.02917333443959554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,31,0.02903999884923299
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,31,0.03238933285077413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,31,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,31,0.029733332494894665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,31,0.02939733366171519
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,31,0.02935466667016347
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,31,0.02934933453798294
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,31,0.029098667204380035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,63,0.03345066557327906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,63,0.029887999097506206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,63,0.028736000259717304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,63,0.028160000840822857
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,63,0.028005334238211315
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,63,0.02804800122976303
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,63,0.02808533360560735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,63,0.029616000751654308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,63,0.03177600105603536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,63,0.02884799987077713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,63,0.028234665592511494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,63,0.030005333324273426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,63,0.028042666614055634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,63,0.028181334336598713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,127,0.03402666747570038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,127,0.030666666726271313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,127,0.03147733211517334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,127,0.030373332401116688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,127,0.030106666187445324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,127,0.030426666140556335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,127,0.030074665943781536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,127,0.03356799980004629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,127,0.03194666653871536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,127,0.030671998858451843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,127,0.03065599997838338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,127,0.030410667260487873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,127,0.030213333666324615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,127,0.03030933439731598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,255,0.04125866790612539
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,255,0.03810133288304011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,255,0.0369759996732076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,255,0.0364533339937528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,255,0.03700799991687139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,255,0.036271999279658
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,255,0.036373332142829895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,255,0.037818667789300285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,255,0.04134399940570196
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,255,0.03719999889532725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,255,0.03692266593376795
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,255,0.03664533297220866
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,255,0.036506667733192444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,255,0.0363520011305809
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,511,0.04364799956480662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,511,0.03987200061480204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,511,0.038986665507157646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,511,0.038746667404969536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,511,0.03850133220354716
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,511,0.03843733419974645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,511,0.03819733361403147
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,511,0.04353600243727366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,511,0.039434666434923805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,511,0.038693333665529885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,511,0.0386559988061587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,511,0.03862933317820231
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,511,0.038202665746212006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,511,0.038560000558694206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,1023,0.05310933291912079
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,1023,0.046522667010625206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,1023,0.04487466812133789
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,1023,0.044480000933011375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,1023,0.04465599854787191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,1023,0.04483733574549357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,1023,0.04469866553942362
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,1023,0.04515733321507772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,1023,0.044624000787734985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,1023,0.052970667680104576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,1023,0.046480000019073486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,1023,0.04497600098450979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,1023,0.04455466568470001
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,1023,0.04502933224042257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,2047,0.06021333237489065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,2047,0.05675200124581655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,2047,0.05670933425426483
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,2047,0.0580266664425532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,2047,0.06555200119813283
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,2047,0.05716800192991892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,2047,0.05665599803129832
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,2047,0.06567466755708058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,2047,0.060362666845321655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,2047,0.058287998040517174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,2047,0.05650666852792104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,2047,0.056703999638557434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,4095,0.08408533533414204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,4095,0.0787360022465388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,4095,0.0757173349459966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,2047,0.057061334451039634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,2047,0.056277334690093994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,4095,0.0747626672188441
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,4095,0.07403199871381123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,4095,0.0737013320128123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,4095,0.08403733372688293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,4095,0.07411199808120728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,4095,0.07575466732184093
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,4095,0.0788266658782959
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,4095,0.07422933479150136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,4095,0.0738560010989507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,4095,0.07396266857783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,8191,0.11831999818483989
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,4095,0.07339199880758922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,8191,0.11252267162005107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,8191,0.1090666651725769
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,8191,0.1095360020796458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,8191,0.10806399583816528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,8191,0.1074666678905487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,8191,0.11833066741625468
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,8191,0.10991467038790385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,8191,0.1071573297182719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,8191,0.10854400197664897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,8191,0.10789333780606587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,8191,0.10813333590825398
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,8191,0.10781332850456238
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,8191,0.11186133821805318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,16383,0.18522665898005167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,16383,0.1805866758028666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,16383,0.17729600270589194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,16383,0.17631999651590982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,16383,0.1748853325843811
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,16383,0.17501866817474365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,16383,0.18628267447153726
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,16383,0.17935466766357422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,16383,0.17511999607086182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,16383,0.17813332875569662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,16383,0.1758026679356893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,16383,0.1755946675936381
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,16383,0.175327996412913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,16383,0.1750826636950175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,32767,0.3164213299751282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,32767,0.32547199726104736
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,32767,0.31063467264175415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,32767,0.3083039919535319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,32767,0.3078239957491557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,32767,0.3259999950726827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,32767,0.30769066015879315
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,32767,0.3085599939028422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,32767,0.31562666098276776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,32767,0.3088266650835673
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,32767,0.3126613299051921
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,32767,0.3089759945869446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,32767,0.3076373338699341
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,32767,0.3065226674079895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,65535,0.5814400116602579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,65535,0.593013326327006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,65535,0.5760266780853271
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,65535,0.5715893507003784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,65535,0.5739626487096151
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,65535,0.5713813304901123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,65535,0.5708160003026327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,65535,0.5908480087916056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,65535,0.5760746796925863
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,65535,0.5810399850209554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,65535,0.574282685915629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,65535,0.5706773201624552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,65535,0.5732213258743286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,65535,0.5720746517181396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,1,131071,1.1189653078715007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,2,131071,1.1061173280080159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,4,131071,1.1035786469777424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,8,131071,1.09934401512146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,16,131071,1.0990986824035645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,32,131071,1.098405361175537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,32,1,64,131071,1.0966133276621501
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,1,131071,1.1190773646036785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,2,131071,1.1082879702250164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,4,131071,1.1033439636230469
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,1,0.03641066700220108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,1,0.03899733225504557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,8,131071,1.0997973283131917
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,16,131071,1.0989279747009277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,32,131071,1.0986560185750325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,1,0.03544000039498011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,32,1,64,131071,1.0951519807179768
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,1,0.03482666611671448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,1,0.034458667039871216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,1,0.034490667283535004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,1,0.033930666744709015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,1,0.03881600002447764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,1,0.03605866680542628
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,1,0.035445332527160645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,1,0.03487999985615412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,1,0.03435199956099192
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,1,0.03444266567627589
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,1,0.03454933315515518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,3,0.03869866579771042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,3,0.03620799879233042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,3,0.03489600121974945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,3,0.03468266626199087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,3,0.03442133218050003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,3,0.03431999931732813
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,3,0.03435733417669932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,3,0.038319999972979225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,3,0.03606399893760681
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,3,0.03503466645876566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,3,0.03430933256944021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,3,0.034634667138258614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,3,0.03440000116825104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,3,0.03402666747570038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,7,0.036015999813874565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,7,0.034976000587145485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,7,0.03866666555404663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,7,0.0344106654326121
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,7,0.034490667283535004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,7,0.03440533330043157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,7,0.034160000582536064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,7,0.03862400104602178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,7,0.0359946663180987
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,7,0.03495999922355016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,7,0.03457066665093104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,7,0.0342399999499321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,7,0.044154668847719826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,7,0.034261333445707955
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,15,0.03830400109291077
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,15,0.035487999518712364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,15,0.03396799912055334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,15,0.03399466723203659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,15,0.03385599950949351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,15,0.03457066665093104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,15,0.035375999907652535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,15,0.03346133232116699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,15,0.03817066550254822
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,15,0.033573334415753685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,15,0.034416000048319496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,15,0.03398933261632919
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,15,0.03385599950949351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,15,0.03369600077470144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,31,0.03793599953254064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,31,0.03501333296298981
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,31,0.034202667574087776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,31,0.03346133232116699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,31,0.03329066683848699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,31,0.03399466723203659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,31,0.03345600018898646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,31,0.033173332611719765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,31,0.03765333443880081
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,31,0.0351200004418691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,31,0.033600000043710075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,31,0.03329066683848699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,31,0.033045334120591484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,31,0.033226666351159416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,63,0.0373333344856898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,63,0.03444266567627589
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,63,0.0325546662012736
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,63,0.03294399877389272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,63,0.03230399886767069
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,63,0.032255999743938446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,63,0.03442666679620743
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,63,0.03726933399836222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,63,0.032074667513370514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,63,0.03326933334271113
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,63,0.046426668763160706
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,63,0.03215999901294708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,63,0.0322773332397143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,63,0.03207999964555105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,127,0.03909866760174433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,127,0.03615466753641764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,127,0.03496533383925756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,127,0.03507733345031738
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,127,0.034586665530999504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,127,0.03444266567627589
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,127,0.034517332911491394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,127,0.03932266682386398
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,127,0.03514666606982549
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,127,0.03643200049797694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,127,0.034373333056767784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,127,0.03442666679620743
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,127,0.03475199888149897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,127,0.03452266752719879
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,255,0.04292800029118856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,255,0.04002666721741358
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,255,0.038634667793909706
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,255,0.038202665746212006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,255,0.037962667644023895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,255,0.037978666524092354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,255,0.03810133288304011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,255,0.04276266694068909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,255,0.04035733391841253
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,255,0.038245332737763725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,255,0.03849600007136663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,255,0.03812800099452337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,255,0.038032000263532005
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,255,0.03793599953254064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,511,0.053898667295773826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,511,0.04906133313973745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,511,0.06106133262316386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,511,0.04821866750717163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,511,0.04817600051561991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,511,0.047925333182017006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,511,0.047930667797724404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,511,0.06098133325576782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,511,0.0537013312180837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,511,0.049098665515581764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,511,0.04822933177153269
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,511,0.04781333108743032
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,511,0.04808533191680908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,511,0.04795733094215393
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,1023,0.06644266843795776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,1023,0.07454399764537811
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,1023,0.06192533175150553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,1023,0.06075733403364817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,1023,0.06163733204205831
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,1023,0.06066133578618368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,1023,0.06039999922116598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,1023,0.07542933523654938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,1023,0.06639466683069865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,1023,0.06192000210285187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,1023,0.06067200005054474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,1023,0.0609386662642161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,1023,0.061253334085146584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,1023,0.06051200131575266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,2047,0.08447999755541484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,2047,0.09173867106437683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,2047,0.08027199904123943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,2047,0.07875200112660725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,2047,0.07774933179219563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,2047,0.07885866860548656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,2047,0.07821333408355713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,2047,0.09207466244697571
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,2047,0.08409600456555684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,2047,0.08003200093905131
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,2047,0.07880533238252004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,2047,0.07826666533946991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,2047,0.07863466441631317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,2047,0.07804266611735027
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,4095,0.1270026663939158
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,4095,0.11870400110880534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,4095,0.11498666803042094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,4095,0.113237331310908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,4095,0.11300266782442729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,4095,0.11291733384132385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,4095,0.11272000273068745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,4095,0.12643733620643616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,4095,0.11859732866287231
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,4095,0.11461866895357768
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,4095,0.11331199606259663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,4095,0.11251733700434367
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,4095,0.11245866616566975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,4095,0.11286933223406474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,8191,0.19319466749827066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,8191,0.18042133251825967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,8191,0.18486400445302328
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,8191,0.17960532506306967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,8191,0.17893866697947183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,8191,0.17940799395243326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,8191,0.17829867204030356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,8191,0.19420266151428223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,8191,0.18519999583562216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,8191,0.1807466745376587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,8191,0.17908267180124918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,8191,0.18006932735443115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,8191,0.17837866147359213
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,8191,0.17893334229787192
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,16383,0.340554674466451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,16383,0.3174346685409546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,16383,0.324890673160553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,16383,0.31514134009679157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,16383,0.3109546701113383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,16383,0.3123679955800374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,16383,0.3107466697692871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,16383,0.33908267815907794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,16383,0.3266879916191101
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,16383,0.3175040086110433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,16383,0.313914676507314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,16383,0.3131093382835388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,16383,0.31055466334025067
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,16383,0.31058667103449505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,32767,0.6053920189539591
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,32767,0.5925120115280151
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,32767,0.5823839902877808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,32767,0.5774079958597819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,32767,0.5763786633809408
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,32767,0.5762346585591634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,32767,0.5747413237889608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,32767,0.6058346827824911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,32767,0.5888479948043823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,32767,0.5814933379491171
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,32767,0.5784746805826823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,32767,0.5752106507619222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,32767,0.5764106512069702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,32767,0.5745439926783243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,65535,1.1108160018920898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,65535,1.105727990468343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,65535,1.1207040150960286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,65535,1.1389493147532146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,65535,1.1034560203552246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,65535,1.103226661682129
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,65535,1.1025280157725017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,65535,1.1354880332946777
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,65535,1.119381348292033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,65535,1.1093653043111165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,65535,1.107594648996989
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,65535,1.1044267018636067
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,65535,1.1026559670766194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,65535,1.101855993270874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,1,131071,2.1869813601175943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,2,131071,2.169541358947754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,4,131071,2.1626292864481607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,8,131071,2.1549812952677407
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,16,131071,2.1527679761250815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,32,131071,2.1522720654805503
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,64,1,64,131071,2.1514506340026855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,1,131071,2.187978744506836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,1,0.048613334695498146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,2,131071,2.1720426877339682
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,1,0.0450133333603541
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,4,131071,2.1601386070251465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,1,0.04307733476161957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,1,0.042208001017570496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,32,131071,2.149679978688558
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,16,131071,2.152986685434977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,8,131071,2.1581494013468423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,64,1,64,131071,2.1495839754740396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,1,0.04172799984614054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,1,0.04140799989302953
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,1,0.04159466673930486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,1,0.048709332942962646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,1,0.044768000642458596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,1,0.043061330914497375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,1,0.04196799794832865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,1,0.041690667470296226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,1,0.04161066561937332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,1,0.04145599901676178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,3,0.044826666514078774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,3,0.04288533329963684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,3,0.0488373339176178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,3,0.041984001795450844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,3,0.04154666761557261
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,3,0.041434665520985924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,3,0.041450666884581246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,3,0.04855999847253164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,3,0.044853334625562034
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,3,0.04271466533342997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,3,0.042064001162846885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,3,0.04167999823888143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,3,0.041333332657814026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,3,0.04154666761557261
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,7,0.04854399959246317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,7,0.04134399940570196
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,7,0.04261866708596548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,7,0.041365332901477814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,7,0.044405331214269005
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,7,0.04168533285458883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,7,0.041152000427246094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,7,0.048485333720842995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,7,0.04454400142033895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,7,0.042677332957585655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,7,0.04181866844495138
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,7,0.04141333450873693
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,7,0.041322665909926094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,15,0.04794133206208547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,7,0.04106666644414266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,15,0.04420266548792521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,15,0.041450666884581246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,15,0.04228266576925913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,15,0.04089066634575526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,15,0.04093866546948751
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,15,0.0408746674656868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,15,0.048021331429481506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,15,0.044165333112080894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,15,0.042362665136655174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,15,0.04127999891837438
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,15,0.04083733260631561
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,15,0.04093866546948751
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,15,0.04109866668780645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,31,0.04762133459250132
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,31,0.04363733530044556
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,31,0.04171200096607208
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,31,0.04104000081618627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,31,0.04045333216587702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,31,0.040421334405740104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,31,0.040474665661652885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,31,0.04780800143877665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,31,0.04368533194065094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,31,0.04182399809360504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,31,0.040821333726247154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,31,0.04053333401679993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,31,0.040394666294256844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,31,0.040522667268911995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,63,0.0431573341290156
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,63,0.047413334250450134
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,63,0.04101333270470301
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,63,0.040063999593257904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,63,0.03942399968703588
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,63,0.03957866628964742
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,63,0.039434666434923805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,63,0.047322665651639305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,63,0.043194666504859924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,63,0.041002665956815086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,63,0.03969600051641464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,63,0.03987200061480204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,63,0.039279999832312264
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,63,0.03942399968703588
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,127,0.04935466746489207
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,127,0.045007998744646706
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,127,0.04309333364168803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,127,0.042090664307276406
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,127,0.04164266586303711
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,127,0.04179200033346812
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,127,0.04162133236726125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,127,0.045109331607818604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,127,0.04921066761016846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,127,0.043061330914497375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,127,0.042037333051363625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,127,0.041738669077555336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,127,0.04165333261092504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,127,0.04164266586303711
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,255,0.05819733440876007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,255,0.05056533217430115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,255,0.04692799846331278
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,255,0.04577599962552389
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,255,0.045381332437197365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,255,0.04515199859937032
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,255,0.04545066754023234
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,255,0.0580213318268458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,255,0.05097599824269613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,255,0.047450666626294456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,255,0.045642669002215065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,255,0.04533866544564565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,255,0.04523199796676636
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,255,0.04623466730117798
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,511,0.06833066542943318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,511,0.0588266650835673
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,511,0.0636053333679835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,511,0.05972266693909963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,511,0.05935466786225637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,511,0.060720001657803856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,511,0.058746665716171265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,511,0.06780800223350525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,511,0.06332799792289734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,511,0.0609440008799235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,511,0.0591893345117569
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,511,0.05892266829808553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,511,0.05938666562239329
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,511,0.058864002426465355
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,1023,0.08614400029182434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,1023,0.08069866895675659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,1023,0.07754666606585185
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,1023,0.07640533149242401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,1023,0.07727466523647308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,1023,0.07657066484292348
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,1023,0.07648000121116638
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,1023,0.08057066798210144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,1023,0.0769706666469574
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,1023,0.07798933486143748
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,1023,0.07693333427111308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,1023,0.0865066647529602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,1023,0.0766186664501826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,1023,0.07682133217652638
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,2047,0.12135466933250427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,2047,0.11518399914105733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,2047,0.11077866951624553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,2047,0.11075199643770854
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,2047,0.11135466893513997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,2047,0.11097066601117452
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,2047,0.110944002866745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,2047,0.12106133500734965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,2047,0.11585066715876262
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,2047,0.11110933621724446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,2047,0.11079999804496765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,2047,0.11227200428644817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,2047,0.11028800408045451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,4095,0.18870933850606283
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,2047,0.11057600378990173
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,4095,0.18062933286031088
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,4095,0.1835199991861979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,4095,0.17935466766357422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,4095,0.1788640022277832
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,4095,0.17838400602340698
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,4095,0.17867734034856161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,4095,0.18822934230168661
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,4095,0.1800373395284017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,4095,0.17922665675481161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,4095,0.1829493244489034
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,4095,0.17851199706395468
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,4095,0.1788533329963684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,4095,0.178330659866333
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,8191,0.32105066378911334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,8191,0.31462399164835614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,8191,0.31253333886464435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,8191,0.31116267045338947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,8191,0.3105173309644063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,8191,0.30979732672373456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,8191,0.3095146616299947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,8191,0.31988799571990967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,8191,0.3155999978383382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,8191,0.3124000032742818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,8191,0.31095999479293823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,8191,0.309717337290446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,8191,0.31031999985376996
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,8191,0.3105226755142212
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,16383,0.6332693497339884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,16383,0.6082506577173868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,16383,0.5945866505304972
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,16383,0.5847040017445883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,16383,0.582149346669515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,16383,0.5814986626307169
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,16383,0.5833280086517334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,16383,0.6314773162206014
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,16383,0.5938239892323812
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,16383,0.6070506572723389
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,16383,0.5857119957605997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,16383,0.5825866858164469
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,16383,0.5796960194905599
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,16383,0.5803680022557577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,32767,1.165013313293457
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,32767,1.137114683787028
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,32767,1.1206560134887695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,32767,1.1113813718159993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,32767,1.1048213640848796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,32767,1.102666695912679
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,32767,1.1059679985046387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,32767,1.1646186510721843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,32767,1.1371893088022869
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,32767,1.12226136525472
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,32767,1.111418644587199
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,32767,1.105013370513916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,32767,1.1006080309549968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,32767,1.1052853266398113
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,65535,2.2221546173095703
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,65535,2.191706657409668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,65535,2.1745227177937827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,65535,2.163109302520752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,65535,2.1558027267456055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,65535,2.1591572761535645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,65535,2.1564319928487143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,65535,2.2206880251566568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,65535,2.193317254384359
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,65535,2.175722599029541
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,65535,2.1634720166524253
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,65535,2.1589760780334473
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,65535,2.158031940460205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,65535,2.1552747090657554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,1,131071,4.317781448364258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,32,131071,4.251248041788737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,2,131071,4.290149370829265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,16,131071,4.25434144337972
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,128,1,64,131071,4.252223968505859
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,8,131071,4.256405194600423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,4,131071,4.271797180175781
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,1,131071,4.317269325256348
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,1,0.06853866577148438
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,1,0.0613013356924057
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,1,0.057775999108950295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,1,0.056186666091283165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,1,0.05486933390299479
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,1,0.055386667450269066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,1,0.054655998945236206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,1,0.0686773310105006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,1,0.06117333471775055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,1,0.05779733260472616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,2,131071,4.288415908813477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,1,0.056176001826922096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,1,0.05540800094604492
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,32,131071,4.251989364624023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,4,131071,4.273152033487956
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,8,131071,4.259519894917806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,1,0.05489600201447805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,16,131071,4.25820795694987
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,1,0.05486399928728739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,3,0.06851199766000111
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,128,1,64,131071,4.252005259195964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,3,0.0609440008799235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,3,0.05750399827957153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,3,0.05622933308283488
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,3,0.055205335219701133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,3,0.05484800040721893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,3,0.054527997970581055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,3,0.06113066772619883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,3,0.06863999863465627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,3,0.05740800003210703
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,3,0.056032001972198486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,3,0.05529066423575083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,3,0.05474133292833964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,7,0.06072533130645752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,7,0.06816533207893372
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,3,0.054655998945236206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,7,0.05715733269850413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,7,0.055493334929148354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,7,0.055045331517855324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,7,0.054602667689323425
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,7,0.05420800050099691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,7,0.06868266562620799
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,7,0.05561066667238871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,7,0.05462400118509928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,7,0.05459199845790863
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,7,0.05718400080998739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,7,0.06066666543483734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,15,0.06746666630109151
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,7,0.05434666574001312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,15,0.05973333120346069
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,15,0.056176001826922096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,15,0.05420266588528951
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,15,0.05461333195368449
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,15,0.05331199864546458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,15,0.0674720009167989
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,15,0.05359466870625814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,15,0.05959466596444448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,15,0.05649066468079885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,15,0.054010664423306785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,15,0.05470933516820272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,15,0.05331199864546458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,15,0.05342933535575867
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,31,0.06764799853165944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,31,0.05540800094604492
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,31,0.05870933334032694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,31,0.05364799996217092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,31,0.05305600166320801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,31,0.052517334620157875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,31,0.05240533252557119
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,31,0.06861333549022675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,31,0.05880533158779144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,31,0.05526933570702871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,31,0.05288533369700114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,31,0.05231999854246775
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,31,0.05367999772230784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,63,0.07234666744867961
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,63,0.05806399881839752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,31,0.05268266797065735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,63,0.05342933535575867
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,63,0.05187733471393585
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,63,0.05068266888459524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,63,0.05045866469542185
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,63,0.05062933266162872
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,63,0.057904000083605446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,63,0.0728959987560908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,63,0.05161066850026449
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,63,0.05046933392683665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,63,0.05091199775536855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,63,0.05357333521048228
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,63,0.050437331199645996
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,127,0.0672106643517812
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,127,0.05963733295599619
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,127,0.056421334544817604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,127,0.05490666627883911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,127,0.05491200089454651
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,127,0.0780320018529892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,127,0.07780799766381581
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,127,0.05478399991989136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,127,0.05899199843406677
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,127,0.056554665168126426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,127,0.06727999945481618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,127,0.05516799787680308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,127,0.05487466851870219
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,127,0.0547626664241155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,255,0.08674666285514832
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,255,0.07225599884986877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,255,0.06785066425800323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,255,0.069733331600825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,255,0.0680320014556249
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,255,0.06885333359241486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,255,0.07698666552702586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,255,0.07672533392906189
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,255,0.08693333466847737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,255,0.07177599767843883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,255,0.069541335105896
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,255,0.06835199892520905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,255,0.06786133348941803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,255,0.06845866640408833
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,511,0.09571733077367146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,511,0.10629866520563762
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,511,0.08772266904513042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,511,0.08639466762542725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,511,0.08603733777999878
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,511,0.08554133772850037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,511,0.0906826655069987
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,511,0.10591999689737956
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,511,0.09506666660308838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,511,0.0883626639842987
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,511,0.08595200379689534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,511,0.08659199873606364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,511,0.08994666735331218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,511,0.08576533198356628
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,1023,0.12501866618792215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,1023,0.1295146644115448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,1023,0.1221386690934499
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,1023,0.12091733018557231
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,1023,0.14134400089581808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,1023,0.11985066533088684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,1023,0.12447999914487202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,1023,0.12936000029246011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,1023,0.14095466335614523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,1023,0.11922666430473328
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,1023,0.12230400244394939
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,1023,0.12035733461380005
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,1023,0.11968533198038737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,1023,0.12046933174133301
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,2047,0.19751467307408652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,2047,0.20737600326538086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,2047,0.19164266188939413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,2047,0.187717338403066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,2047,0.18818666537602743
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,2047,0.18720000982284546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,2047,0.18892266352971396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,2047,0.1914880077044169
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,2047,0.18944533665974936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,2047,0.19833066066106161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,2047,0.20759467283884683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,2047,0.18773333231608072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,2047,0.1867413322130839
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,2047,0.18683199087778726
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,4095,0.33905065059661865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,4095,0.3236266573270162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,4095,0.3298666675885518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,4095,0.320906658967336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,4095,0.32096532980600995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,4095,0.32011733452479046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,4095,0.3203786611557007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,4095,0.3395040035247803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,4095,0.33053867022196454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,4095,0.32320000727971393
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,4095,0.3203893303871155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,4095,0.3227786620457967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,4095,0.32121066252390545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,4095,0.31983999411265057
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,8191,0.6222560008366903
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,8191,0.6480000019073486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,8191,0.605733315149943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,8191,0.590831995010376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,8191,0.5951946576436361
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,8191,0.5890719890594482
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,8191,0.5897119839986166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,8191,0.6492586533228556
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,8191,0.6215893427530924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,8191,0.5952853361765543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,8191,0.5910293261210123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,8191,0.5878666639328003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,8191,0.6041813294092814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,8191,0.588650663693746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,16383,1.1859839757283528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,16383,1.1543947060902913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,16383,1.1180853048960369
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,16383,1.1161333719889324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,16383,1.1355679829915364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,16383,1.1236693064371746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,16383,1.1133333047231038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,16383,1.1873066425323486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,16383,1.1535413265228271
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,16383,1.137056032816569
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,16383,1.1225120226542156
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,16383,1.119050661722819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,16383,1.113317330678304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,16383,1.1175893147786458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,32767,2.250725269317627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,32767,2.2090187072753906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,32767,2.1764426231384277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,32767,2.188917318979899
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,32767,2.1704160372416177
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,32767,2.1633493105570474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,32767,2.16758394241333
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,32767,2.247152010599772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,32767,2.2108426094055176
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,32767,2.188117345174154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,32767,2.177834669748942
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,32767,2.1674133936564126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,32767,2.1634559631347656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,32767,2.161738713582357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,1,65535,4.354389190673828
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,2,65535,4.313130696614583
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,4,65535,4.28550402323405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,16,65535,4.263360023498535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,32,65535,4.261247952779134
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,256,1,64,65535,4.260224024454753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,8,65535,4.275477409362793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,1,65535,4.35211722056071
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,1,0.0944640040397644
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,1,0.1120693286259969
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,1,0.08774399757385254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,1,0.08385599652926128
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,1,0.0823413332303365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,1,0.08158933122952779
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,1,0.08127466837565105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,1,0.09425066908200581
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,1,0.11201066772143047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,1,0.08758399883906047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,1,0.08390933275222778
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,2,65535,4.311738650004069
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,1,0.08238933483759563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,1,0.08145600060621898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,4,65535,4.287568092346191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,1,0.08105066418647766
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,3,0.11109333237012227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,3,0.09486933549245198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,16,65535,4.2681013743082685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,3,0.08404800295829773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,32,65535,4.262703895568848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,3,0.08684266606966655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,3,0.08137600123882294
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,3,0.08201600114504497
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,256,1,64,65535,4.260272026062012
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,3,0.08107733229796092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,3,0.11123733719189961
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,3,0.0939626693725586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,8,65535,4.274213473002116
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,3,0.08366933465003967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,3,0.08734400073687236
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,3,0.0819946676492691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,3,0.08141333361466725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,3,0.08111466467380524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,7,0.08649067083994548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,7,0.09337066610654195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,7,0.11077866951624553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,7,0.08094933132330577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,7,0.08275733391443889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,7,0.08063999811808269
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,7,0.0803466687599818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,7,0.11054933071136475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,7,0.08276266853014629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,7,0.08111999928951263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,7,0.086325337489446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,7,0.0937653382619222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,7,0.08073066671689351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,7,0.08011733492215474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,15,0.07971733311812083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,15,0.08425600330034892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,15,0.08072533210118611
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,15,0.07884266475836436
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,15,0.07856533428033192
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,15,0.09307199716567993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,15,0.11065600315729777
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,15,0.11037866274515788
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,15,0.08078399797280629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,15,0.09299199779828389
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,15,0.07957333326339722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,15,0.07891199986139934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,15,0.08431466420491536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,31,0.09187199672063191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,15,0.07852800190448761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,31,0.11072533329327901
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,31,0.07896000146865845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,31,0.08239466448624928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,31,0.076773335536321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,31,0.07750399907430013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,31,0.07638399799664815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,31,0.1109226644039154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,31,0.09212266405423482
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,31,0.07911466558774312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,31,0.0772213339805603
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,31,0.07685866455237071
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,31,0.07668266693751018
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,31,0.08222933113574982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,63,0.11379733681678772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,63,0.09516800443331401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,63,0.07867200175921123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,63,0.08513599634170532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,63,0.07293333113193512
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,63,0.0746666689713796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,63,0.07289066910743713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,63,0.11427733302116394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,63,0.09492799639701843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,63,0.08589866757392883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,63,0.07875733574231465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,63,0.07222400108973186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,63,0.07505066692829132
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,63,0.07298666735490163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,127,0.12293866276741028
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,127,0.08973866701126099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,127,0.09411199887593587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,127,0.08787733316421509
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,127,0.10386133193969727
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,127,0.08635733524958293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,127,0.08672533432642619
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,127,0.12319466471672058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,127,0.10377599795659383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,127,0.08798399567604065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,127,0.0897813340028127
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,127,0.09474666913350423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,127,0.08628799517949422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,127,0.08749333024024963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,255,0.14070399602254233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,255,0.11162133018175761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,255,0.12157866358757019
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,255,0.10804800192515056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,255,0.10433600346247356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,255,0.10552000006039937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,255,0.1030399998029073
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,255,0.10800000031789143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,255,0.11202133695284526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,255,0.14168000221252441
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,255,0.12103999654452006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,255,0.1051680048306783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,255,0.10400533676147461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,255,0.10332799951235454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,511,0.17840532461802164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,511,0.14814399679501852
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,511,0.14074132839838663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,511,0.15755732854207358
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,511,0.13902399937311807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,511,0.13782399892807007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,511,0.14351999759674072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,511,0.1787466605504354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,511,0.1571466624736786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,511,0.14396799604098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,511,0.14822933077812195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,511,0.13897599776585898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,511,0.1410719950993856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,511,0.13851733009020487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,1023,0.22537066539128622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,1023,0.21566933393478394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,1023,0.2481493353843689
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,1023,0.2110933264096578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,1023,0.20800000429153442
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,1023,0.2060533364613851
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,1023,0.20561067263285318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,1023,0.24707732597986856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,1023,0.22591465711593628
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,1023,0.21613333622614542
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,1023,0.2062986691792806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,1023,0.2074079910914103
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,1023,0.20548800627390543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,1023,0.2109546661376953
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,2047,0.37911466757456463
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,2047,0.35766398906707764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,2047,0.34865065415700275
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,2047,0.3431413173675537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,2047,0.340175986289978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,2047,0.33866135279337567
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,2047,0.33771733442942303
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,2047,0.3777546485265096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,2047,0.3577440182367961
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,2047,0.34808532396952313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,2047,0.3445599873860677
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,2047,0.34060800075531006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,2047,0.33828266461690265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,2047,0.3380693197250366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,4095,0.6817493438720703
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,4095,0.6301120122273763
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,4095,0.6143519878387451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,4095,0.6092373530069987
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,4095,0.6501386562983195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,4095,0.6017599900563558
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,4095,0.6027893225351969
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,4095,0.6805866559346517
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,4095,0.6486826737721761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,4095,0.607589324315389
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,4095,0.6149706840515137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,4095,0.628874659538269
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,4095,0.6025813420613607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,4095,0.6033866802851359
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,8191,1.2314613660176594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,8191,1.1870400110880535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,8191,1.1605760256449382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,8191,1.1463359991709392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,8191,1.1346773306528728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,8191,1.1318986415863037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,8191,1.1280639966328938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,8191,1.2316266695658367
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,8191,1.1872373421986897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,8191,1.1623093287150066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,8191,1.1476853688557942
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,8191,1.131989320119222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,8191,1.1361973285675049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,8191,1.1295359929402669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,16383,2.2946507136027017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,16383,2.2470399538675943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,16383,2.213263988494873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,16383,2.1942292849222818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,16383,2.1795360247294107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,16383,2.1852639516194663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,16383,2.1910452842712402
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,16383,2.2909599939982095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,16383,2.245744069417318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,16383,2.198186715443929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,16383,2.2162559827168784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,16383,2.1868693033854165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,16383,2.1812853813171387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,16383,2.1793600718180337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,2,32767,4.354938824971517
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,1,32767,4.409589449564616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,4,32767,4.320261319478353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,8,32767,4.298010508219401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,512,1,64,32767,4.273818651835124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,1,32767,4.409781455993652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,32,32767,4.278346697489421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,1,0.19192000230153403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,1,0.16221867005030313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,16,32767,4.284618695576985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,1,0.1482080022493998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,1,0.13684266805648804
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,1,0.13897599776585898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,2,32767,4.354640007019043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,1,0.13525866468747458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,1,0.13428266843159994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,4,32767,4.318394660949707
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,1,0.19130132595698038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,1,0.1483840048313141
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,1,0.16226133704185486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,1,0.13737600048383078
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,1,0.1393226683139801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,1,0.134853333234787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,1,0.13525333007176718
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,3,0.1604586640993754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,3,0.1461066703001658
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,3,0.18996266523996988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,3,0.13779200116793314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,3,0.13486933708190918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,3,0.13317867120107016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,3,0.13353066643079123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,3,0.1600266695022583
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,3,0.18959466616312662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,3,0.14586666226387024
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,8,32767,4.300234794616699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,3,0.13803733388582864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,512,1,64,32767,4.277413368225098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,3,0.13478400309880575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,3,0.1327946682771047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,3,0.13351466258366904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,16,32767,4.287760098775228
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,7,0.1888373295466105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,7,0.13705066839853922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,32,32767,4.278661410013835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,7,0.14475733041763306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,7,0.15890133380889893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,7,0.13175466656684875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,7,0.13333866993586221
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,7,0.13117333253224692
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,7,0.14474133650461832
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,7,0.1590559979279836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,7,0.13607466220855713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,7,0.18892266352971396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,7,0.1327839990456899
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,7,0.13211199641227722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,7,0.13081600268681845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,15,0.14323733250300089
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,15,0.13378133376439413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,15,0.15666133165359497
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,15,0.12889599800109863
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,15,0.1304853359858195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,15,0.18791999419530234
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,15,0.1285599966843923
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,15,0.18789867560068765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,15,0.12939199805259705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,15,0.1302293340365092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,15,0.13360533118247986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,15,0.14290666580200195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,15,0.15728533267974854
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,15,0.1288373370965322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,31,0.153519997994105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,31,0.18664000431696573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,31,0.1395039955774943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,31,0.1304639975229899
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,31,0.12773866454760233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,31,0.12653332948684692
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,31,0.12476266423861186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,31,0.18650666872660318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,31,0.12743467092514038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,31,0.12517333030700684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,31,0.1534826656182607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,31,0.12567466497421265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,31,0.13896000385284424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,31,0.13134933511416116
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,63,0.19010132551193237
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,63,0.12965333461761475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,63,0.13869866728782654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,63,0.15587733189264932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,63,0.12594667077064514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,63,0.1239520013332367
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,63,0.12321066856384277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,63,0.15562666455904642
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,63,0.12590400377909342
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,63,0.18922666708628336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,63,0.13821867108345032
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,63,0.12301866213480632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,63,0.12386666735013326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,63,0.12965333461761475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,127,0.1550826629002889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,127,0.2095200022061666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,127,0.1731520096460978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,127,0.14217066764831543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,127,0.13990400234858194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,127,0.1385546624660492
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,127,0.1460853318373362
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,127,0.2092426617940267
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,127,0.14242666959762573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,127,0.14550933241844177
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,127,0.17260799805323282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,127,0.14007467031478882
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,127,0.13860266407330832
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,127,0.1553813318411509
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,255,0.1803893248240153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,255,0.176581343015035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,255,0.2429973284403483
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,255,0.1738133430480957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,255,0.19019200404485068
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,255,0.20682666699091592
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,255,0.1722453236579895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,255,0.24316267172495523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,255,0.18036800622940063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,255,0.17637866735458374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,255,0.18954133987426758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,255,0.207370658715566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,255,0.17281067371368408
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,255,0.17413334051767984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,511,0.31918400526046753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,511,0.26077866554260254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,511,0.24567999442418417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,511,0.25015999873479206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,511,0.24180267254511514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,511,0.2414720058441162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,511,0.2786453366279602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,511,0.3192800084749858
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,511,0.2422773241996765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,511,0.2603786587715149
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,511,0.24539732933044434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,511,0.24039467175801596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,511,0.2504799962043762
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,511,0.2789600094159444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,1023,0.41385066509246826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,1023,0.3855733474095662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,1023,0.378762682278951
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,1023,0.37569598356882733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,1023,0.37204798062642414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,1023,0.3941226800282796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,1023,0.45536001523335773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,1023,0.4556320110956828
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,1023,0.3969279925028483
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,1023,0.41394134362538654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,1023,0.3726133505503337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,1023,0.3844746748606364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,1023,0.3749493360519409
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,1023,0.37861335277557373
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,2047,0.69923202196757
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,2047,0.7446613311767578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,2047,0.6700106461842855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,2047,0.6511733531951904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,2047,0.6307253440221151
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,2047,0.6357440153757731
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,2047,0.6420213381449381
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,2047,0.7426453431447347
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,2047,0.6997439861297607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,2047,0.6519733270009359
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,2047,0.6419626474380493
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,2047,0.6701653003692627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,2047,0.6320799986521403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,2047,0.6335093180338541
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,4095,1.312165339787801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,4095,1.2502293586730957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,4095,1.2091680367787678
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,4095,1.1723306973775227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,4095,1.187007983525594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,4095,1.159989356994629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,4095,1.1638879776000977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,4095,1.3090399901072185
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,4095,1.2494080066680908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,4095,1.209114631017049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,4095,1.1716907024383545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,4095,1.1867093245188396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,4095,1.1660373210906982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,4095,1.161072015762329
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,8191,2.3168160120646157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,8191,2.3923840522766113
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,8191,2.272693316141764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,8191,2.242677370707194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,8191,2.2262399991353354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,8191,2.2141812642415366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,8191,2.209109306335449
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,8191,2.3963146209716797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,8191,2.3166240056355796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,8191,2.271578629811605
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,8191,2.2433973948160806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,8191,2.2236000696818032
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,8191,2.2163893381754556
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,8191,2.2108160654703775
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,1,16383,4.523770650227864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,2,16383,4.436229387919108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,4,16383,4.380144119262695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,32,16383,4.312080065409343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,16,16383,4.332031885782878
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,8,16383,4.341744105021159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,1,1024,1,64,16383,4.314847946166992
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,1,16383,4.518351872762044
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,1,0.02254933367172877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,1,0.02661866694688797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,1,0.02733866622050603
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,1,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,1,0.02569066733121872
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,1,0.025733334322770435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,1,0.02565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,1,0.02250133454799652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,1,0.02681066592534383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,1,0.027258666853109997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,1,0.02640533447265625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,1,0.02573866645495097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,1,0.025936000049114227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,1,0.025487999121348064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,3,0.0223786657055219
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,3,0.027002667387326557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,3,0.026554666459560394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,3,0.025813333690166473
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,3,0.025941332181294758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,3,0.025770666698614757
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,3,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,3,0.022346665461858112
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,3,0.027087998886903126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,3,0.026682667434215546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,3,0.026021334032217663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,3,0.02593066543340683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,3,0.025861332813898723
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,3,0.02554133286078771
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,7,0.022175999979178112
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,7,0.02700799951950709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,7,0.02640533447265625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,7,0.02573866645495097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,7,0.025663999219735462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,7,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,7,0.025514667232831318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,7,0.027290667096773785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,7,0.021733333667119343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,7,0.026426665484905243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,7,0.0259253333012263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,7,0.025775998830795288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,7,0.025407999753952026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,7,0.02568000058333079
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,15,0.0215786670645078
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,15,0.02701866626739502
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,15,0.025978667040665943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,15,0.025642665723959606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,15,0.02526933451493581
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,15,0.02516799916823705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,15,0.025205334027608235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,15,0.021429332594076794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,15,0.027072000006834667
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,15,0.02589333305756251
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,15,0.025781333446502686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,15,0.02510400116443634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,15,0.025386666258176167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,4,16383,4.379994710286458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,31,0.02139200021823247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,15,0.025072000920772552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,31,0.02611200014750163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,16,16383,4.3282772699991865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,31,0.02518933266401291
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,31,0.026613332331180573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,31,0.025098666548728943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,31,0.024832000335057575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,31,0.024885334074497223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,31,0.021221332252025604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,31,0.026687999566396076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,31,0.025685332715511322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,31,0.025466665625572205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,31,0.024735999604066212
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,31,0.025087999800841015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,31,0.024874667326609295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,63,0.020181333025296528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,63,0.025487999121348064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,63,0.024885334074497223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,8,16383,4.34767468770345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,32,16383,4.319066683451335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,63,0.02404266595840454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,63,0.02405333270629247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,63,0.0240639994541804
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,63,0.023775999744733173
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,63,0.025424001117547352
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,63,0.020106667031844456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,63,0.02460266649723053
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,63,0.024160000185171764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,63,0.023951999843120575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,63,0.02459733436505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,63,0.02399466683467229
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,127,0.02826133370399475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,127,0.027306665976842243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,127,0.02657066782315572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,127,0.023210667073726654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,127,0.026000000536441803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,127,0.025648000339667004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,127,0.02333866556485494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,127,0.02589866767326991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,127,0.028405333558718365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,127,0.026538667579491932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,127,0.02587733417749405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,127,0.025818665822347004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,127,0.027002667387326557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,127,0.02571733295917511
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,255,0.02327466756105423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,255,0.02847466617822647
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,255,0.027119999130566914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,255,0.02664000044266383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,255,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,255,0.026021334032217663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,255,0.02316266546646754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,255,0.02849599967400233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,255,0.026362667481104534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,255,0.027162666122118633
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,255,0.02661866694688797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,255,0.02610666553179423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,255,0.025989333788553875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,255,0.025946666797002155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,511,0.02380799998839696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,511,0.02926933268706004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,511,0.02779199928045273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,511,0.027285332481066387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,511,0.026608000199000042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,1,1024,1,64,16383,4.310346603393555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,511,0.02659733345111211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,511,0.024005333582560223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,511,0.029050665597120922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,511,0.027615999182065327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,511,0.027280000348885853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,511,0.026538667579491932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,511,0.026714667677879333
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,511,0.02679466704527537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,1023,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,511,0.026752000053723652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,1023,0.028783999383449554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,1023,0.028170667588710785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,1023,0.03014400104681651
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,1023,0.027829334139823914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,1023,0.027888000011444092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,1023,0.02771199991305669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,1023,0.02553066611289978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,1023,0.030085332691669464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,1023,0.02759466568628947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,1023,0.02788266787926356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,1023,0.02829866607983907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,1023,0.028543998797734577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,1023,0.027690666417280834
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,2047,0.033029332756996155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,2047,0.027855999767780304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,2047,0.03145600110292435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,2047,0.030618667602539062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,2047,0.03012266755104065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,2047,0.030533333619435627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,2047,0.028031999866167705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,2047,0.03281066566705704
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,2047,0.030095999439557392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,2047,0.03141866624355316
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,2047,0.030661332110563915
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,2,16383,4.4329226811726885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,2047,0.030405332644780476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,2047,0.030405332644780476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,2047,0.033770665526390076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,4095,0.02951466788848241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,4095,0.03482133398453394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,4095,0.035877334574858345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,4095,0.037418665985266365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,4095,0.034527999659379326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,4095,0.03437866767247518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,4095,0.03449599941571554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,4095,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,4095,0.040133332212766014
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,4095,0.03581333408753077
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,4095,0.03438399980465571
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,4095,0.03436800092458725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,4095,0.035402665535608925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,4095,0.03453866640726725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,8191,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,8191,0.037946666280428566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,8191,0.033471999069054924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,8191,0.03681600093841553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,8191,0.036506667733192444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,8191,0.03654933224121729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,8191,0.033413333197434746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,8191,0.03899733225504557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,8191,0.03792533278465271
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,8191,0.03659733384847641
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,8191,0.036730666955312095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,8191,0.03655466685692469
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,8191,0.03700266778469086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,8191,0.03643200049797694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,16383,0.038575999438762665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,16383,0.048453330993652344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,16383,0.04638933142026266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,16383,0.045066664616266884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,16383,0.044394666949907936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,16383,0.04470933477083842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,16383,0.03923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,16383,0.04839999973773956
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,16383,0.044591998060544334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,16383,0.04650666813055674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,16383,0.04510400195916494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,16383,0.04450666904449463
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,16383,0.04454400142033895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,32767,0.04888000090916952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,32767,0.05658133327960968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,32767,0.049039999643961586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,32767,0.04915733138720194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,16383,0.04424533247947693
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,32767,0.04924799998601278
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,32767,0.05113600194454193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,32767,0.04804266492525736
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,32767,0.048858667413393654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,32767,0.05622933308283488
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,32767,0.048986668388048805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,32767,0.05173333485921224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,32767,0.04840533435344696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,65535,0.06306666632493337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,65535,0.06749866902828217
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,32767,0.04814399778842926
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,32767,0.048394665122032166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,65535,0.06446933249632518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,65535,0.062122667829195656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,65535,0.06312533219655354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,65535,0.061759998401006065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,65535,0.06331199904282887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,65535,0.06285866598288219
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,65535,0.0620000014702479
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,65535,0.06769600013891856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,65535,0.06258133550484975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,65535,0.06481066842873891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,65535,0.061887999375661217
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,65535,0.06180266539255778
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1,1,1,131071,0.09083200494448344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1,1,4,131071,0.08201600114504497
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1,1,16,131071,0.08021866778532664
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1,1,2,131071,0.08622399965922038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1,1,64,131071,0.0792799989382426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1,1,1,131071,0.0909440020720164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1,1,8,131071,0.08073066671689351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1,1,2,131071,0.08681066830952962
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1,1,32,131071,0.0790880024433136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1,1,8,131071,0.08044800162315369
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1,1,4,131071,0.0819893330335617
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1,1,32,131071,0.07976000010967255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1,1,16,131071,0.07945066690444946
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1,1,64,131071,0.07886933286984761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,1,0.02756800005833308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,1,0.022522665560245514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,1,0.026517334083716076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,1,0.02658133457104365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,1,0.025983999172846477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,1,0.025962665677070618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,1,0.025909334421157837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,1,0.022543999056021374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,1,0.027610667049884796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,1,0.026506667335828144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,1,0.025914666553338368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,1,0.025888000925381977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,1,0.026789332429567974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,3,0.023525332411130268
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,1,0.026005332668622334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,3,0.027215999861558277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,3,0.02661866694688797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,3,0.026176000634829204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,3,0.02587733417749405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,3,0.02584533393383026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,3,0.022469334304332733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,3,0.026538667579491932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,3,0.0258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,3,0.02628266563018163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,3,0.044010668992996216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,3,0.025914666553338368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,3,0.025765334566434223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,3,0.02586666742960612
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,7,0.022042666872342426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,7,0.027104000250498455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,7,0.025557334224383037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,7,0.025957333544890087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,7,0.025648000339667004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,7,0.026501332720120747
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,7,0.027237333357334137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,7,0.02214933435122172
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,7,0.025626666843891144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,7,0.026485333840052288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,7,0.025754667818546295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,7,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,15,0.026159999271233875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,15,0.027002667387326557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,15,0.021898667017618816
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,15,0.025663999219735462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,7,0.025461333493391674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,7,0.02550933261712392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,15,0.025424001117547352
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,15,0.025477332373460133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,15,0.025205334027608235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,15,0.02186666677395503
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,15,0.02693866689999898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,15,0.026127999027570088
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,15,0.025455998877684276
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,15,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,15,0.025600001215934753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,15,0.025370667378107708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,31,0.021402666966120403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,31,0.026762666801611584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,31,0.025792000194390614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,31,0.028218666712443035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,31,0.025477332373460133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,31,0.02513599892457326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,31,0.02677333354949951
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,31,0.025040000677108765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,31,0.02593066543340683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,31,0.025477332373460133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,31,0.02534399926662445
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,31,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,31,0.025407999753952026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,31,0.025205334027608235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,63,0.0204373337328434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,63,0.02478933334350586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,63,0.024336000283559162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,63,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,63,0.023930666347344715
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,63,0.023989332218964893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,63,0.023989332218964893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,63,0.025568000972270966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,63,0.020453333854675293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,63,0.02473066747188568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,63,0.024304000039895374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,63,0.024005333582560223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,63,0.02390933285156886
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,127,0.028853334486484528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,63,0.023973333338896435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,127,0.02754133443037669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,127,0.02348266790310542
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,127,0.0269813338915507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,127,0.026208000878492992
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,127,0.02629333237806956
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,127,0.02681066592534383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,127,0.02346666653951009
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,127,0.027290667096773785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,127,0.02899733434120814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,127,0.026757332185904186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,127,0.026538667579491932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,127,0.02640533447265625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,127,0.02625600000222524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,255,0.023610666394233704
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,255,0.029045333464940388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,255,0.027610667049884796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,255,0.026575999955336254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,255,0.027109332382678986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,255,0.02664533257484436
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,255,0.023728000621000927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,255,0.02739733209212621
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,255,0.028912000358104706
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,255,0.02659733345111211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,255,0.026933332284291584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,255,0.02693866689999898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,255,0.026746665438016255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,511,0.025146665672461193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,511,0.029951999584833782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,255,0.026608000199000042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,511,0.028394666810830433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,511,0.02756800005833308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,511,0.02741333345572154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,511,0.028016000986099243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,511,0.029893333713213604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,511,0.025226667523384094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,511,0.02844800055027008
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,511,0.027274665733178455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,511,0.027850667635599773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,511,0.027562665442625683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,511,0.027317332724730175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,1023,0.027221334477265675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,1023,0.03236799935499827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,1023,0.030789333085219067
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,511,0.027295999228954315
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,1023,0.029461334149042766
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,1023,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,1023,0.029882666965325672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,1023,0.029338667790095013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,1023,0.02717866748571396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,1023,0.03222399950027466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,1023,0.029440000653266907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,1023,0.03009066730737686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,1023,0.02940266579389572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,2047,0.028917332490285236
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,1023,0.029232000311215717
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,2047,0.035760000348091125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,1023,0.030784000953038532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,2047,0.033930666744709015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,2047,0.032933334509531655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,2047,0.032442666590213776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,2047,0.0322773332397143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,2047,0.035631999373435974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,2047,0.03266666581233343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,2047,0.0342399999499321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,2047,0.02956266701221466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,2047,0.032746667663256325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,2047,0.03236266722281774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,2047,0.032672000428040825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,4095,0.03206400076548258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,2047,0.032101333141326904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,4095,0.03590933233499527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,4095,0.03443199892838796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,4095,0.03749333322048187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,4095,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,4095,0.03468266626199087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,4095,0.03514666606982549
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,4095,0.03193599979082743
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,4095,0.03748800108830134
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,4095,0.03578133384386698
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,4095,0.0348693331082662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,4095,0.03446399917205175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,4095,0.03474666674931844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,8191,0.04197866717974345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,8191,0.04020266731580099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,4095,0.03480000048875809
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,8191,0.03756800045569738
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,8191,0.03949866692225138
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,8191,0.03934400031963984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,8191,0.038917332887649536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,8191,0.03913066784540812
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,8191,0.03685333331425985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,8191,0.04188799858093262
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,8191,0.039994666973749794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,8191,0.03899733225504557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,8191,0.04049066702524821
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,16383,0.047872001926104225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,8191,0.038831998904546104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,8191,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,16383,0.05313600103060404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,16383,0.04824000100294749
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,16383,0.04507199923197428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,16383,0.04613333443800608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,16383,0.04493333399295807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,16383,0.04730666677157084
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,16383,0.05297600229581197
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,16383,0.048394665122032166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,16383,0.04497066636880239
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,16383,0.04685866832733154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,16383,0.044938668608665466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,16383,0.045226668318112694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,16383,0.04486933350563049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,32767,0.06126399834950765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,32767,0.0639519989490509
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,32767,0.0609386662642161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,32767,0.05881600081920624
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,32767,0.059877331058184304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,32767,0.05834133426348368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,32767,0.058362667759259544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,32767,0.05970133344332377
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,32767,0.060959999759991966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,32767,0.06439466774463654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,32767,0.0610346645116806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,32767,0.058789332707722984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,32767,0.057989334066708885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,65535,0.08956799904505412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,32767,0.0584746648867925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,65535,0.08331733445326488
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,65535,0.07630399862925212
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,65535,0.07885866860548656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,65535,0.07753600180149078
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,65535,0.07534933090209961
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,65535,0.07541333138942719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,65535,0.08960533142089844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,65535,0.07962666451931
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,65535,0.08268266419569652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,65535,0.07574399809042613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,65535,0.07538666824499766
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,65535,0.07578133543332417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,65535,0.07769600053628285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,2,1,2,131071,0.11700800061225891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,2,1,1,131071,0.1439306636651357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,2,1,4,131071,0.11401066184043884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,2,1,16,131071,0.10991467038790385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,2,1,8,131071,0.11137066284815471
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,2,1,2,131071,0.11674132943153381
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,2,1,4,131071,0.11271466811498006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,2,1,32,131071,0.10998933513959248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,2,1,1,131071,0.14333867033322653
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,2,1,64,131071,0.10821866989135742
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,2,1,8,131071,0.11106133460998535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,2,1,16,131071,0.10995733737945557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,2,1,64,131071,0.10971200466156006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,1,0.022991999983787537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,1,0.027984000742435455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,1,0.027306665976842243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,1,0.026608000199000042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,1,0.026416001220544178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,2,1,32,131071,0.11030933260917664
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,1,0.0262719988822937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,1,0.02628266563018163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,1,0.02298133323589961
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,1,0.0278613343834877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,1,0.026677332818508148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,1,0.02626666675011317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,1,0.027263998985290527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,1,0.0262773334980011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,1,0.026191999514897663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,3,0.022805333137512207
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,3,0.027893332143624622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,3,0.027098665634791057
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,3,0.02657066782315572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,3,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,3,0.026208000878492992
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,3,0.027776000400384266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,3,0.022757334013779957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,3,0.02657066782315572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,3,0.027215999861558277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,3,0.026208000878492992
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,3,0.02606933315594991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,3,0.026101333399613697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,7,0.02276266614596049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,7,0.027621333797772724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,3,0.02611733227968216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,7,0.026917333404223125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,7,0.026528000831604004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,7,0.025941332181294758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,7,0.025888000925381977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,7,0.02604266752799352
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,7,0.022677332162857056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,7,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,7,0.02629333237806956
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,7,0.026885333160559338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,7,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,7,0.025818665822347004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,7,0.025861332813898723
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,15,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,15,0.022319999833901722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,15,0.026522666215896606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,15,0.026149332523345947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,15,0.025749333202838898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,15,0.02565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,15,0.025637333591779072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,15,0.02223466585079829
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,15,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,15,0.02610666553179423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,15,0.025786665578683216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,15,0.025792000194390614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,31,0.021642667551835377
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,15,0.026389333109060924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,15,0.025744001070658367
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,31,0.026922665536403656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,31,0.026613332331180573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,31,0.02593066543340683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,31,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,31,0.025258667767047882
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,31,0.025290665527184803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,31,0.025775998830795288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,31,0.02717333287000656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,31,0.026341333985328674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,31,0.021727999051411945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,31,0.025589334468046825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,31,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,31,0.025418666501839954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,63,0.020799999435742695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,63,0.025994665920734406
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,63,0.024688000480333965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,63,0.025424001117547352
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,63,0.02443733314673106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,63,0.024336000283559162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,63,0.024325333535671234
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,63,0.020928000410397846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,63,0.025983999172846477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,63,0.02532266577084859
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,63,0.024698667228221893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,63,0.024458666642506916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,63,0.024432001014550526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,63,0.024282666544119518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,127,0.02920000006755193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,127,0.02794666588306427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,127,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,127,0.024069334069887798
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,127,0.026906666656335194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,127,0.026799999177455902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,127,0.026917333404223125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,127,0.023951999843120575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,127,0.02804800122976303
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,127,0.029205332199732464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,127,0.027482666075229645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,127,0.0268053337931633
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,127,0.02683199942111969
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,127,0.026549334327379864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,255,0.029487999776999157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,255,0.028250666956106823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,255,0.025349333882331848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,255,0.02770666778087616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,255,0.02757333219051361
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,255,0.02700799951950709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,255,0.025194667279720306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,255,0.029711998999118805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,255,0.028410665690898895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,255,0.027215999861558277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,255,0.027349332968393963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,255,0.027290667096773785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,255,0.02769600103298823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,255,0.027130665878454845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,511,0.02712533374627431
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,511,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,511,0.03179199993610382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,511,0.029717333614826202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,511,0.028789333999156952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,511,0.02886933336655299
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,511,0.029338667790095013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,511,0.02720000098148982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,511,0.03044266750415166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,511,0.03189333279927572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,511,0.029018667836983997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,511,0.02956266701221466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,511,0.028789333999156952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,511,0.028949332733949024
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,1023,0.02905600021282832
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,1023,0.033045334120591484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,1023,0.031770666440327965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,1023,0.03489600121974945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,1023,0.03209600100914637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,1023,0.031471999982992806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,1023,0.031770666440327965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,1023,0.03312533348798752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,1023,0.03483733286460241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,1023,0.02959999938805898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,1023,0.05579733351866404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,1023,0.03209600100914637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,1023,0.031445334355036415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,1023,0.03147733211517334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,2047,0.03183466692765554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,2047,0.036602665980656944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,2047,0.03381866713364919
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,2047,0.03506666670242945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,2047,0.03370666752258936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,2047,0.034202667574087776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,2047,0.03344533344109853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,2047,0.03179199993610382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,2047,0.03649600098530451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,2047,0.03488533447186152
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,2047,0.034128000338872276
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,2047,0.03340800106525421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,2047,0.033930666744709015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,4095,0.03696000079313914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,4095,0.040549332896868386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,2047,0.03369066615899404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,4095,0.03904533386230469
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,4095,0.03806933263937632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,4095,0.038245332737763725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,4095,0.04044266790151596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,4095,0.0376800000667572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,4095,0.0377813329299291
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,4095,0.036389333506425224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,4095,0.03950933367013931
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,4095,0.03827200084924698
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,4095,0.038015998899936676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,4095,0.0379573330283165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,4095,0.03765333443880081
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,8191,0.04721599817276001
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,8191,0.04558933277924856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,8191,0.0428959975639979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,8191,0.04308266441027323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,8191,0.05003199974695841
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,8191,0.04389866689840952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,8191,0.04279466470082601
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,8191,0.0472320020198822
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,8191,0.0487360010544459
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,8191,0.045509333411852516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,8191,0.043322667479515076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,8191,0.04387199878692627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,8191,0.04292800029118856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,16383,0.06160533428192139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,8191,0.04337066908677419
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,16383,0.0627040018637975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,16383,0.05895466605822245
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,16383,0.057317331433296204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,16383,0.05671999851862589
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,16383,0.05638400216897329
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,16383,0.05583466589450836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,16383,0.06142933170000712
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,16383,0.05884266893068949
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,16383,0.05709866682688395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,16383,0.056554665168126426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,16383,0.05633600056171417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,16383,0.06266133487224579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,16383,0.05605333546797434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,32767,0.08807466427485149
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,32767,0.0809386670589447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,32767,0.07694399853547414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,32767,0.07519466678301494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,32767,0.07446933289368947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,32767,0.07427733143170674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,32767,0.08918933073679607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,32767,0.07364800075689952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,32767,0.07726400097211202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,32767,0.075573335091273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,32767,0.08075200021266937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,32767,0.0746559997399648
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,32767,0.07403199871381123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,32767,0.07381333410739899
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,65535,0.14408533771832785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,65535,0.11053333679835002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,65535,0.10893332958221436
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,65535,0.10749333103497823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,65535,0.11489066481590271
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,65535,0.10682666301727295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,65535,0.10803733269373576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,65535,0.14324266711870828
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,65535,0.11449600259462993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,65535,0.11141866445541382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,65535,0.10878400007883708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,65535,0.10787733395894368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,4,1,2,131071,0.1834026575088501
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,4,1,1,131071,0.2500266631444295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,65535,0.10843200484911601
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,4,1,4,131071,0.1783626675605774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,65535,0.10738666852315266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,4,1,8,131071,0.17598400513331094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,4,1,16,131071,0.17512534062067667
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,4,1,32,131071,0.17497066656748453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,4,1,64,131071,0.17428799470265707
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,4,1,1,131071,0.25040000677108765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,4,1,4,131071,0.17923200130462646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,4,1,2,131071,0.1830079952875773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,4,1,8,131071,0.17591466506322226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,4,1,16,131071,0.1746506690979004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,4,1,64,131071,0.17427200078964233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,4,1,32,131071,0.17403733730316162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,1,0.023823998868465424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,1,0.027823999524116516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,1,0.027424000203609467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,1,0.028618666032950085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,1,0.026816000541051228
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,1,0.026848000784715016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,1,0.026789332429567974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,1,0.023904000719388325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,1,0.02870933214823405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,1,0.0276853342851003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,1,0.027189334233601887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,1,0.02685333291689555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,1,0.026799999177455902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,3,0.023567999402681988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,3,0.02775999903678894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,3,0.0284853329261144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,1,0.026890667776266735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,3,0.027263998985290527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,3,0.02683199942111969
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,3,0.026719999810059864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,3,0.026629333694775898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,3,0.023562667270501454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,3,0.027637332677841187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,3,0.028624000648657482
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,3,0.026778665681680042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,3,0.027221334477265675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,3,0.026693334182103474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,3,0.026880001028378803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,7,0.023333333432674408
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,7,0.0283146674434344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,7,0.02719466636578242
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,7,0.02659733345111211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,7,0.027456000447273254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,7,0.023503998915354412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,7,0.02824000020821889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,7,0.026591998835404713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,7,0.026543999711672466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,7,0.027210667729377747
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,7,0.027215999861558277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,7,0.026501332720120747
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,7,0.026575999955336254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,7,0.026682667434215546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,15,0.023205332458019257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,15,0.02619733413060506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,15,0.026933332284291584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,15,0.02624533325433731
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,15,0.027978666126728058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,15,0.026933332284291584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,15,0.0260959987839063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,15,0.023397333920001984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,15,0.027808000644048054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,15,0.026314665873845417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,15,0.026730666557947796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,15,0.026911998788515728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,15,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,31,0.02275199939807256
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,31,0.02771199991305669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,15,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,31,0.026752000053723652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,31,0.02626666675011317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,31,0.025941332181294758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,31,0.025807999074459076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,31,0.022709332406520844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,31,0.0258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,31,0.02611733227968216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,31,0.02645866572856903
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,31,0.027493332823117573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,31,0.026650667190551758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,31,0.025888000925381977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,31,0.025775998830795288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,63,0.026575999955336254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,63,0.021909333765506744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,63,0.02535466601451238
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,63,0.024986666937669117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,63,0.02587733417749405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,63,0.02179199953873952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,63,0.025013332565625507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,63,0.024800000091393787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,63,0.026608000199000042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,63,0.025749333202838898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,63,0.024826665719350178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,63,0.025370667378107708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,127,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,63,0.024842667082945507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,63,0.02808533360560735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,127,0.028736000259717304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,127,0.030031998952229817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,127,0.028223998844623566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,127,0.02762666592995326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,127,0.02794133375088374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,127,0.027658666173617046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,127,0.025648000339667004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,127,0.030000001192092896
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,127,0.028698667883872986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,127,0.027658666173617046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,127,0.027845333019892376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,127,0.02754666656255722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,127,0.028304000695546467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,255,0.027386667827765148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,255,0.03182400017976761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,255,0.03046933313210805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,255,0.02962133288383484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,255,0.029258665939172108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,255,0.028864001234372456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,255,0.02882133424282074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,255,0.032069332897663116
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,255,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,255,0.027477333943049114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,255,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,255,0.029706666866938274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,255,0.02884799987077713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,255,0.028581333657105763
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,511,0.03456533451875051
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,511,0.02922133356332779
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,511,0.03287466615438461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,511,0.03158933420976003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,511,0.03130666663249334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,511,0.03202133377393087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,511,0.031343999008337654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,511,0.029071999092896778
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,511,0.03461866577466329
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,511,0.03296533226966858
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,511,0.03190399954716364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,511,0.03154666721820831
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,511,0.03130666663249334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,1023,0.03659733384847641
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,1023,0.03457066665093104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,1023,0.03328000009059906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,511,0.031109333038330078
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,1023,0.033386667569478355
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,1023,0.04331733286380768
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,1023,0.033376000821590424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,1023,0.03381866713364919
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,1023,0.03266133368015289
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,1023,0.03643733263015747
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,1023,0.03338133295377096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,1023,0.033402666449546814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,1023,0.0332640012105306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,1023,0.03507733345031738
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,1023,0.03386666625738144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,2047,0.03718400001525879
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,2047,0.0408693328499794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,2047,0.03786666691303253
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,2047,0.03864533454179764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,2047,0.03752533346414566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,2047,0.03731200098991394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,2047,0.03775466730197271
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,2047,0.040805332362651825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,2047,0.037477334340413414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,2047,0.038805333276589714
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,2047,0.03781333317359289
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,2047,0.037765334049860634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,2047,0.03717333326737086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,2047,0.037392000357309975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,4095,0.048954665660858154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,4095,0.047210668524106346
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,4095,0.04372799893220266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,4095,0.043375998735427856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,4095,0.04223466912905375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,4095,0.04486933350563049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,4095,0.04186133543650309
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,4095,0.0492799977461497
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,4095,0.047824000318845115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,4095,0.045279999574025474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,4095,0.043791999419530235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,4095,0.04343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,4095,0.04230933388074239
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,8191,0.06283733248710632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,8191,0.06195733447869619
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,8191,0.058090666929880776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,4095,0.04195733368396759
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,8191,0.05606933434804281
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,8191,0.057114665706952415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,8191,0.05552533268928528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,8191,0.05563733478387197
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,8191,0.06300800045331319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,8191,0.06208533545335134
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,8191,0.05850133299827576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,8191,0.05602133274078369
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,8191,0.055493334929148354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,8191,0.056703999638557434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,16383,0.08101866642634074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,16383,0.08859200278917949
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,8191,0.05560533205668131
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,16383,0.07632000247637431
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,16383,0.07416533430417378
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,16383,0.07317866881688435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,16383,0.07314666608969371
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,16383,0.0895146628220876
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,16383,0.0746613343556722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,16383,0.080485333998998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,16383,0.0726453314224879
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,16383,0.07610666751861572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,16383,0.07354133327802022
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,16383,0.07285333176453908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,16383,0.07284266750017802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,32767,0.11454400420188904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,32767,0.14350933829943338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,32767,0.10962667067845662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,32767,0.10678399602572124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,32767,0.1074026624361674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,32767,0.10800000031789143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,32767,0.1067680021127065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,32767,0.14282133181889853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,32767,0.11530133088429768
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,32767,0.11001066366831462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,32767,0.10860266288121541
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,32767,0.10771200060844421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,32767,0.107424000898997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,65535,0.25012266635894775
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,65535,0.18580265839894614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,65535,0.17946666479110718
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,65535,0.1767680048942566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,32767,0.10669333736101787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,65535,0.1774453322092692
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,65535,0.17612266540527344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,65535,0.25013866027196247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,65535,0.18598934014638266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,65535,0.17730667193730673
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,65535,0.1750719944636027
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,65535,0.17920533816019693
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,65535,0.17646400133768717
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,65535,0.17613865931828818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,65535,0.17697066068649292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,8,1,4,131071,0.3124000032742818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,8,1,1,131071,0.4644639889399211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,8,1,2,131071,0.31727466980616253
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,8,1,16,131071,0.30715733766555786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,8,1,32,131071,0.30583999554316205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,8,1,8,131071,0.3083893259366353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,8,1,64,131071,0.3062826593716939
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,8,1,2,131071,0.31708266337712604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,8,1,1,131071,0.4641600052515666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,8,1,4,131071,0.3102666735649109
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,8,1,16,131071,0.30692267417907715
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,8,1,8,131071,0.3083893259366353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,8,1,32,131071,0.30561600128809613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,8,1,64,131071,0.30613332986831665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,1,0.02613866577545802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,1,0.02994133283694585
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,1,0.028586665789286297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,1,0.028165332973003387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,1,0.02787200113137563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,1,0.02787200113137563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,1,0.029018667836983997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,1,0.030213333666324615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,1,0.026127999027570088
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,1,0.028981332977612812
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,1,0.028160000840822857
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,1,0.02834133307139079
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,1,0.02770666778087616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,1,0.027893332143624622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,3,0.028245332340399425
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,3,0.029813334345817566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,3,0.026101333399613697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,3,0.03151999910672506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,3,0.028837333122889202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,3,0.027845333019892376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,3,0.02770666778087616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,3,0.02607999990383784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,3,0.02977066735426585
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,3,0.02884799987077713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,3,0.028143999477227528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,3,0.046485334634780884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,7,0.026000000536441803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,3,0.027744000156720478
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,7,0.029690665503342945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,3,0.028533334533373516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,7,0.028437333802382152
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,7,0.027914665639400482
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,7,0.028016000986099243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,7,0.0276853342851003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,7,0.02759466568628947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,7,0.029706666866938274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,7,0.02607999990383784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,7,0.02810666710138321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,7,0.02869333326816559
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,7,0.02794133375088374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,7,0.027535999814669292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,7,0.027642667293548584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,15,0.025637333591779072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,15,0.029359998802344005
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,15,0.0284853329261144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,15,0.027669332921504974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,15,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,15,0.027285332481066387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,15,0.025610665480295818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,15,0.02959466725587845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,15,0.02769600103298823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,15,0.028416000306606293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,15,0.027466667195161183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,15,0.030559999247392017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,15,0.02807466685771942
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,15,0.02714666724205017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,31,0.025040000677108765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,31,0.0288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,31,0.027285332481066387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,31,0.026922665536403656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,31,0.027850667635599773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,31,0.02683199942111969
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,31,0.025221332907676697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,31,0.027893332143624622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,31,0.02888533224662145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,31,0.027434666951497395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,31,0.02700799951950709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,31,0.026863999664783478
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,31,0.026906666656335194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,63,0.02474133421977361
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,63,0.02794133375088374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,31,0.02699200063943863
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,63,0.025914666553338368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,63,0.026965332527955372
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,63,0.02621866762638092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,63,0.02588266630967458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,63,0.02585600068171819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,63,0.02442666639884313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,63,0.027962667246659596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,63,0.026330667237440746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,63,0.025914666553338368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,63,0.02589866767326991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,63,0.026842666169007618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,127,0.026373334228992462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,63,0.025807999074459076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,127,0.03262399882078171
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,127,0.03161066770553589
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,127,0.029882666965325672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,127,0.030218665798505146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,127,0.029872000217437744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,127,0.026346666117509205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,127,0.03107200066248576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,127,0.030085332691669464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,127,0.03054933249950409
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,127,0.029685333371162415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,127,0.032655999064445496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,127,0.029733332494894665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,127,0.029648000995318096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,255,0.03278400003910065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,255,0.03260799994071325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,255,0.03432533393303553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,255,0.03234666585922241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,255,0.03155199935038885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,255,0.03265066693226496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,255,0.03162133445342382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,255,0.03472000112136205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,255,0.03194666653871536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,255,0.03267733256022135
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,255,0.03180799881617228
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,255,0.03225066761175791
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,255,0.03146133323510488
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,255,0.031685332457224526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,511,0.03764266769091288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,511,0.03510933369398117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,511,0.03363200028737386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,511,0.03402666747570038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,511,0.03455466777086258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,511,0.03332799921433131
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,511,0.03389866650104523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,511,0.035088000198205314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,511,0.037445334096749626
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,511,0.03392533212900162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,511,0.03453866640726725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,511,0.03548266738653183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,511,0.033904001116752625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,1023,0.040287998815377556
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,1023,0.039893334110577904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,511,0.033376000821590424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,1023,0.04229333500067393
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,1023,0.03822399924198786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,1023,0.03871466716130575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,1023,0.040207999447981514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,1023,0.03822399924198786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,1023,0.04242666562398275
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,1023,0.03841066608826319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,1023,0.03984533250331879
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,1023,0.0386559988061587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,1023,0.03809066613515218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,1023,0.03854399919509888
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,1023,0.03822399924198786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,2047,0.0513973335425059
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,2047,0.050527999798456825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,2047,0.043824002146720886
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,2047,0.04573333263397217
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,2047,0.04387199878692627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,2047,0.04979733129342397
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,2047,0.05146666864554087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,2047,0.043375998735427856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,2047,0.04611733555793762
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,2047,0.043280000487963356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,2047,0.04382933179537455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,2047,0.04321066538492838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,2047,0.04353600243727366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,2047,0.042917331059773765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,4095,0.05878399809201559
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,4095,0.056661332647005715
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,4095,0.056517332792282104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,4095,0.06482133269309998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,4095,0.05584533512592316
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,4095,0.06291733185450236
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,4095,0.064410666624705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,4095,0.0627040018637975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,4095,0.05864533285299937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,4095,0.057002668579419456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,4095,0.05593066910902659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,4095,0.055626665552457176
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,4095,0.05648533503214518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,4095,0.05570133527119955
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,8191,0.0920853316783905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,8191,0.08189333478609721
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,8191,0.07829333345095317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,8191,0.0751200020313263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,8191,0.07406400144100189
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,8191,0.07314133147398631
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,8191,0.07321066657702129
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,8191,0.08256533245245616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,8191,0.07750933369000752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,8191,0.07518933216730754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,8191,0.09170666337013245
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,8191,0.07468800246715546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,8191,0.07343466579914093
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,8191,0.07326399783293407
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,16383,0.11589333415031433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,16383,0.10999466975529988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,16383,0.10840533177057902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,16383,0.14591466387112936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,16383,0.10811733206113179
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,16383,0.10735999544461568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,16383,0.10746133327484131
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,16383,0.146096001068751
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,16383,0.11474666992823283
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,16383,0.11020267009735107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,16383,0.10820266604423523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,16383,0.10686933000882466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,16383,0.10810133814811707
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,16383,0.1066986620426178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,32767,0.18532266219456991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,32767,0.255023996035258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,32767,0.180458664894104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,32767,0.17779199282328287
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,32767,0.17485332489013672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,32767,0.17427200078964233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,32767,0.17436800400416055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,32767,0.25462400913238525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,32767,0.1868106722831726
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,32767,0.1768853267033895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,32767,0.1757919987042745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,32767,0.17413334051767984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,32767,0.17432000239690146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,65535,0.4681973457336426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,32767,0.18005865812301636
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,65535,0.31962132453918457
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,65535,0.31277332703272503
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,65535,0.30931200583775836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,65535,0.3089119990666707
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,65535,0.3078879912694295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,65535,0.30583999554316205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,65535,0.46805866559346515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,65535,0.3088533282279968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,65535,0.3081120053927104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,65535,0.3206079999605815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,65535,0.307258665561676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,65535,0.31354133288065594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,65535,0.30641067028045654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,16,1,1,131071,0.8971892992655436
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,16,1,2,131071,0.5850559870402018
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,16,1,4,131071,0.5782986481984457
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,16,1,8,131071,0.5740319887797037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,16,1,32,131071,0.5723093350728353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,16,1,1,131071,0.898522694905599
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,16,1,64,131071,0.5695093472798666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,16,1,2,131071,0.583898663520813
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,16,1,16,131071,0.5730453332265218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,16,1,4,131071,0.5781759818394979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,16,1,8,131071,0.5761866569519043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,16,1,16,131071,0.5714666843414307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,16,1,32,131071,0.5716426769892374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,1,0.033413333197434746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,16,1,64,131071,0.5711040099461874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,1,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,1,0.031093334158261616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,1,0.03068266560633977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,1,0.03148266673088074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,1,0.030426666140556335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,1,0.030368000268936157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,1,0.029418667157491047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,1,0.03139200061559677
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,1,0.031045332551002502
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,1,0.03338133295377096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,1,0.030645333230495453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,1,0.03019733230272929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,3,0.02940266579389572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,1,0.030165334542592365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,3,0.03339733431736628
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,3,0.03125333289305369
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,3,0.030991998811562855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,3,0.030400000512599945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,3,0.030373332401116688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,3,0.03027733415365219
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,3,0.029477333029111225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,3,0.031301334500312805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,3,0.030613332986831665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,3,0.030266667405764263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,3,0.03315199911594391
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,3,0.030602666238943737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,7,0.02916266769170761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,3,0.030192000170548756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,7,0.031285333136717476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,7,0.03305600086847941
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,7,0.030389333764712017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,7,0.030133334298928578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,7,0.030207999050617218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,7,0.03017599880695343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,7,0.029338667790095013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,7,0.03121600051720937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,7,0.030613332986831665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,7,0.030133334298928578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,7,0.03299200038115183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,7,0.030085332691669464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,7,0.03028800090154012
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,15,0.03289599965016047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,15,0.029109333952267964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,15,0.030858665704727173
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,15,0.03013866643110911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,15,0.029738667110602062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,15,0.0296426663796107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,15,0.028991999725500744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,15,0.029904000461101532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,15,0.03265066693226496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,15,0.03012799968322118
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,15,0.029872000217437744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,15,0.029696000119050343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,15,0.030965333183606465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,15,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,31,0.028581333657105763
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,31,0.03232000023126602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,31,0.030320001145203907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,31,0.029663999875386555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,31,0.029029332101345062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,31,0.02926933268706004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,31,0.02849599967400233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,31,0.0322026660044988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,31,0.029343999922275543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,31,0.029733332494894665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,31,0.029296000798543293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,31,0.0306986669699351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,31,0.029152000943819683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,31,0.02923733244339625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,63,0.027434666951497395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,63,0.03193599979082743
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,63,0.029509333272775013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,63,0.028666667640209198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,63,0.02815466622511546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,63,0.027978666126728058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,63,0.02831999957561493
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,63,0.027615999182065327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,63,0.03197333216667175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,63,0.029552000264326733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,63,0.028245332340399425
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,63,0.028010666370391846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,63,0.028677334388097126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,63,0.02805333336194356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,127,0.03389333436886469
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,127,0.03134933362404505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,127,0.030799999833106995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,127,0.029477333029111225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,127,0.030320001145203907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,127,0.0303413321574529
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,127,0.029525332152843475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,127,0.030346666773160298
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,127,0.03408533334732056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,127,0.03072533259789149
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,127,0.03742400060097376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,127,0.030394665896892548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,127,0.0306986669699351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,127,0.03025600065787633
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,255,0.03297599901755651
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,255,0.038106667498747505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,255,0.03677866607904434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,255,0.03739733248949051
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,255,0.03659733384847641
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,255,0.03306133300065994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,255,0.04133866727352142
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,255,0.04130133241415024
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,255,0.03678400069475174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,255,0.038133333126703896
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,255,0.036858665446440377
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,255,0.03717333326737086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,255,0.036677333215872444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,255,0.036490666369597115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,511,0.04345066845417023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,511,0.04665599763393402
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,511,0.03989866624275843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,511,0.0390133336186409
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,511,0.038176000118255615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,511,0.03873066604137421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,511,0.03805333375930786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,511,0.046394666035970054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,511,0.04342933495839437
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,511,0.03975466638803482
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,511,0.038762666285037994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,511,0.03866666555404663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,511,0.038149334490299225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,511,0.038831998904546104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,1023,0.05739733576774597
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,1023,0.053413331508636475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,1023,0.04711466530958811
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,1023,0.04598399996757507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,1023,0.044778664906819664
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,1023,0.044351999958356224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,1023,0.05339199801286062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,1023,0.04760533571243286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,1023,0.045066664616266884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,1023,0.058090666929880776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,1023,0.04457599918047587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,1023,0.04613333443800608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,1023,0.04459733267625173
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,1023,0.04427200059096018
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,2047,0.06524799764156342
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,2047,0.0702453354994456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,2047,0.05669866502285004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,2047,0.06010133524735769
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,2047,0.057536001006762184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,2047,0.05669866502285004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,2047,0.05669333537419637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,2047,0.06537599861621857
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,2047,0.07021333277225494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,2047,0.06000000238418579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,2047,0.057706668972969055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,2047,0.05696000158786774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,2047,0.05695466697216034
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,2047,0.056559999783833824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,4095,0.09790933132171631
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,4095,0.084197332461675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,4095,0.07869866490364075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,4095,0.07605866591135661
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,4095,0.07472533484299977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,4095,0.07439466814200084
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,4095,0.07821866869926453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,4095,0.08381332953770955
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,4095,0.07390933235486348
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,4095,0.07502933343251546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,4095,0.07579733431339264
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,4095,0.09875200192133586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,4095,0.07469866673151652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,4095,0.07422400017579396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,8191,0.15152000387509665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,8191,0.11852799852689107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,8191,0.11062933007876079
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,8191,0.1088266670703888
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,8191,0.11188800136248271
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,8191,0.10863999525705974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,8191,0.10807999968528748
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,8191,0.11783466736475627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,8191,0.10943999886512756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,8191,0.10883200168609619
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,8191,0.15187199910481772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,8191,0.1116426686445872
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,16383,0.2606613238652547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,8191,0.10780266920725505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,8191,0.10809066891670227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,16383,0.18544000387191772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,16383,0.17991467316945395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,16383,0.17665600776672363
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,16383,0.17543999354044595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,16383,0.17629865805308023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,16383,0.17520533005396524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,16383,0.2609493335088094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,16383,0.17710934082667032
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,16383,0.17644266287485758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,16383,0.17605332533518472
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,16383,0.1794453263282776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,16383,0.17578667402267456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,16383,0.18568533658981323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,32767,0.4750080108642578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,32767,0.3187626600265503
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,32767,0.30880532662073773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,32767,0.31174399455388385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,32767,0.3099840084711711
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,32767,0.3264480034510295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,32767,0.30823999643325806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,32767,0.4753386576970418
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,32767,0.3265226682027181
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,32767,0.3173546592394511
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,32767,0.3109440008799235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,32767,0.3095039923985799
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,32767,0.30949334303538006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,32767,0.30878933270772296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,65535,0.903322696685791
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,65535,0.5814773241678873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,65535,0.5764426787694296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,65535,0.59224534034729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,65535,0.5746080080668131
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,65535,0.5711679855982462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,65535,0.5724746783574423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,65535,0.5903253157933553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,65535,0.9026186466217041
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,65535,0.582149346669515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,65535,0.5760693152745565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,65535,0.5727786620457967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,65535,0.5716799894968668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,65535,0.5734346707661947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,32,1,1,131071,1.761797269185384
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,32,1,2,131071,1.1187626520792644
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,32,1,8,131071,1.1027626991271973
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,32,1,16,131071,1.0995893478393555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,32,1,32,131071,1.0982346534729004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,32,1,4,131071,1.1079466342926025
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,32,1,64,131071,1.0974506537119548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,32,1,1,131071,1.7610294024149578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,32,1,2,131071,1.1203146775563557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,32,1,4,131071,1.1093920071919758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,1,0.036474667489528656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,32,1,8,131071,1.1025866667429607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,32,1,16,131071,1.1002933184305828
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,32,1,32,131071,1.1002399921417236
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,32,1,64,131071,1.0974559783935547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,1,0.03606933355331421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,1,0.03899733225504557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,1,0.03514666606982549
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,1,0.034527999659379326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,1,0.03472000112136205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,1,0.034186666210492454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,1,0.03601066768169403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,1,0.03886933376391729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,1,0.036517334481080375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,1,0.035114665826161705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,1,0.03430933256944021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,1,0.03458133339881897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,3,0.036303999523321785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,3,0.03621333340803782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,3,0.03870933254559835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,3,0.0352906659245491
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,1,0.03467733412981033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,3,0.03421333432197571
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,3,0.034304000437259674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,3,0.03453333427508672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,3,0.03871466716130575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,3,0.036415999134381614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,3,0.03611200054486593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,3,0.03526400029659271
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,3,0.03465600063403448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,3,0.03425599883000056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,3,0.03449599941571554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,7,0.03841066608826319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,7,0.03589333345492681
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,7,0.036159999668598175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,7,0.034671999514102936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,7,0.034629332522551216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,7,0.0341386670867602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,7,0.03426666557788849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,7,0.03624533365170161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,7,0.038362666964530945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,7,0.03605333218971888
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,7,0.034602666894594826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,7,0.03401066611210505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,7,0.03518400092919668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,7,0.03404266635576884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,15,0.03808533400297165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,15,0.03551999976237615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,15,0.034330666065216064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,15,0.035589332381884255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,15,0.033674667278925575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,15,0.03425066669782003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,15,0.034048000971476235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,15,0.035962666074434914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,15,0.03809066613515218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,15,0.03544000039498011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,15,0.03436800092458725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,15,0.03402133285999298
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,15,0.03370666752258936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,15,0.03403733422358831
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,31,0.03384533276160558
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,31,0.03788800040880839
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,31,0.03524799893299738
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,31,0.03314133236805598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,31,0.035429333647092186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,31,0.033610666791598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,31,0.033013333876927696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,31,0.0351946676770846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,31,0.03789333254098892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,31,0.033743999898433685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,31,0.03499199946721395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,31,0.03331733246644338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,31,0.03332799921433131
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,63,0.03457066665093104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,31,0.03327466547489166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,63,0.034416000048319496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,63,0.0329120010137558
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,63,0.037248000502586365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,63,0.03246400008598963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,63,0.03209066639343897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,63,0.032314665615558624
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,63,0.03451200077931086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,63,0.037434667348861694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,63,0.03422933320204417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,63,0.03292799989382426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,63,0.03223466624816259
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,127,0.036677333215872444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,63,0.03207999964555105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,63,0.03244800120592117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,127,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,127,0.03499199946721395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,127,0.03440533330043157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,127,0.03629866739114126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,127,0.034490667283535004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,127,0.034858666360378265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,127,0.036677333215872444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,127,0.03915733347336451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,127,0.03619199991226196
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,127,0.03499199946721395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,127,0.0344106654326121
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,127,0.034448000291983284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,255,0.04025600105524063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,127,0.0345920001467069
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,255,0.042954668402671814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,255,0.038106667498747505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,255,0.03849600007136663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,255,0.038047999143600464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,255,0.04011200120051702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,255,0.037978666524092354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,255,0.04035199930270513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,255,0.04022933294375738
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,255,0.04293866455554962
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,255,0.038506666819254555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,255,0.03796799977620443
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,255,0.03804266701141993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,255,0.03822933385769526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,511,0.05390933156013489
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,511,0.060831998785336815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,511,0.05100266635417938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,511,0.04849599798520406
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,511,0.048565333088239036
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,511,0.048026666045188904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,511,0.04975999891757965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,511,0.06118399898211161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,511,0.05355200171470642
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,511,0.05060799916585287
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,511,0.04894400139649709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,511,0.04783466458320618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,511,0.04863466819127401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,511,0.049360002080599465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,1023,0.07509866853555043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,1023,0.06657066444555919
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,1023,0.06478933493296306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,1023,0.06131733457247416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,1023,0.06192533175150553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,1023,0.06081066528956095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,1023,0.06047466893990835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,1023,0.07461333274841309
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,1023,0.06105599800745646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,1023,0.06625066697597504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,1023,0.06515199939409892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,1023,0.062352001667022705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,1023,0.060517330964406334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,1023,0.06031466523806254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,2047,0.09222933650016785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,2047,0.09185600280761719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,2047,0.08405333757400513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,2047,0.08057066798210144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,2047,0.07853333155314128
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,2047,0.07838400204976399
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,2047,0.07876800000667572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,2047,0.09240000446637471
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,2047,0.0925546685854594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,2047,0.0842186709245046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,2047,0.07995733122030894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,2047,0.07894933223724365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,2047,0.07799999912579854
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,4095,0.12638399998346964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,4095,0.11443199714024861
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,4095,0.11986133456230164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,4095,0.1162453293800354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,4095,0.11272533734639485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,2047,0.0786293347676595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,4095,0.14819199840227762
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,4095,0.1134986678759257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,4095,0.14762133359909058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,4095,0.12633066376050314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,4095,0.11428800225257874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,4095,0.11319999893506368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,4095,0.11924266815185547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,4095,0.11302399635314941
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,8191,0.2590666611989339
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,4095,0.11301866173744202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,8191,0.1928159991900126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,8191,0.18039466937383017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,8191,0.18481600284576416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,8191,0.17870400349299112
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,8191,0.17870400349299112
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,8191,0.17874133586883545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,8191,0.2592800060908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,8191,0.1786080002784729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,8191,0.18535999457041422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,8191,0.18026133378346762
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,8191,0.17850132783253989
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,8191,0.1932213306427002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,8191,0.17711466550827026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,16383,0.3399893442789714
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,16383,0.32646934191385907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,16383,0.4901653528213501
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,16383,0.3144373297691345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,16383,0.31781333684921265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,16383,0.3123786648114522
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,16383,0.49005866050720215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,16383,0.31224532922108966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,16383,0.3256373405456543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,16383,0.33954668045043945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,16383,0.3155946731567383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,16383,0.3134400049845378
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,16383,0.3182133237520854
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,16383,0.31084267298380536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,32767,0.5911306540171305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,32767,0.5840426683425903
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,32767,0.5769226551055908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,32767,0.9178986549377441
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,32767,0.5764266649881998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,32767,0.5757439931233724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,32767,0.9163573582967123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,32767,0.6067253351211548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,32767,0.6068960030873617
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,32767,0.5923893451690674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,32767,0.5820693174997965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,32767,0.5772106647491455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,32767,0.5791413386662801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,32767,0.5749599933624268
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,65535,1.7715733846028645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,65535,1.1373973687489827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,65535,1.1204906304677327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,65535,1.1105066935221355
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,65535,1.1072853406270344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,65535,1.1049226919809978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,65535,1.1017279624938965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,65535,1.771882692972819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,65535,1.1381066640218098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,65535,1.1135199864705403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,65535,1.1022453308105469
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,65535,1.1063733100891113
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,65535,1.1019519964853923
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,65535,1.1212159792582195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,64,1,1,131071,3.488821347554525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,64,1,2,131071,2.187925338745117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,64,1,8,131071,2.160106658935547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,64,1,4,131071,2.16867733001709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,64,1,16,131071,2.154325326283773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,64,1,32,131071,2.1526506741841636
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,64,1,64,131071,2.1488320032755532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,64,1,1,131071,3.4891093571980796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,1,0.05301333467165629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,64,1,2,131071,2.1897013982137046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,64,1,4,131071,2.1696267127990723
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,64,1,8,131071,2.157872041066488
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,64,1,16,131071,2.157002607981364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,1,0.04461866617202759
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,64,1,32,131071,2.1520214080810547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,1,0.04875733455022176
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,64,1,64,131071,2.1510507265726724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,1,0.0432586669921875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,1,0.041722665230433144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,1,0.042175998290379844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,1,0.0415040006240209
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,1,0.04877333343029022
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,1,0.044821331898371376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,1,0.05294933418432871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,1,0.041850666205088295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,1,0.0415040006240209
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,1,0.04316799839337667
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,1,0.04185600082079569
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,3,0.04874666531880697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,3,0.044666667779286705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,3,0.04188799858093262
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,3,0.053472002347310386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,3,0.04161600023508072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,3,0.043023998538653054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,3,0.04127466678619385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,3,0.048469334840774536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,3,0.05286933481693268
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,3,0.04470400015513102
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,3,0.04288533329963684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,3,0.041536000867684685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,3,0.04192000130812327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,7,0.04846400022506714
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,3,0.04163199911514918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,7,0.044639999667803444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,7,0.0529120018084844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,7,0.04174399872620901
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,7,0.04252799848715464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,7,0.04144000013669332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,7,0.05228800078233083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,7,0.04851733148097992
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,7,0.04117333392302195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,7,0.04164800047874451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,7,0.0425546665986379
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,7,0.04470933477083842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,7,0.04138666639725367
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,7,0.04124800115823746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,15,0.05192000170548757
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,15,0.04225599765777588
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,15,0.04828266799449921
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,15,0.0408693328499794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,15,0.04094400008519491
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,15,0.04131733377774557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,15,0.04398933549722036
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,15,0.05169600248336792
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,15,0.048138668139775596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,15,0.04412800073623657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,15,0.04137066751718521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,15,0.04222933451334635
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,15,0.04106666644414266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,31,0.047482664386431374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,15,0.0573226660490036
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,31,0.04371733466784159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,31,0.041797334949175514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,31,0.05096533397833506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,31,0.04074133435885111
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,31,0.040549332896868386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,31,0.040448000033696495
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,31,0.051445335149765015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,31,0.04165333261092504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,31,0.04377600053946177
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,31,0.04048533240954081
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,31,0.0476746658484141
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,31,0.040821333726247154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,31,0.04029866556326548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,63,0.04295999805132548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,63,0.0410453329483668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,63,0.04721599817276001
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,63,0.05320000151793162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,63,0.040063999593257904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,63,0.03958400090535482
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,63,0.03941333293914795
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,63,0.04731733103593191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,63,0.04294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,63,0.04093866546948751
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,63,0.04004266609748205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,63,0.039493332306543984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,63,0.05332799752553304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,63,0.039461334546407066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,127,0.05903466542561849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,127,0.04955733319123586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,127,0.04491733511288961
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,127,0.0428959975639979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,127,0.04160533348719279
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,127,0.04212800165017446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,127,0.04160533348719279
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,127,0.059119999408721924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,127,0.04929600159327189
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,127,0.044954667488733925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,127,0.042949333786964417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,127,0.042208001017570496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,127,0.041690667470296226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,127,0.04164266586303711
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,255,0.05829866727193197
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,255,0.04727466901143392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,255,0.05022933085759481
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,255,0.04595200220743815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,255,0.045509333411852516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,255,0.06702933212121327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,255,0.04510400195916494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,255,0.06651199857393901
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,255,0.05005866785844167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,255,0.047925333182017006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,255,0.04595200220743815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,255,0.0569653312365214
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,511,0.08015466729799907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,255,0.045509333411852516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,255,0.045519997676213585
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,511,0.0634933312733968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,511,0.06075733403364817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,511,0.05955733358860016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,511,0.06818666557470958
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,511,0.05931733548641205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,511,0.05869866907596588
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,511,0.06306666632493337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,511,0.08052266637484233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,511,0.06806933383146922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,511,0.060640002290407814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,511,0.05949866771697998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,511,0.05927466849486033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,511,0.05903466542561849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,1023,0.10776533683141072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,1023,0.08689066767692566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,1023,0.0783733328183492
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,1023,0.0763733337322871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,1023,0.07701333363850911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,1023,0.08105599880218506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,1023,0.07658666869004567
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,1023,0.10795733332633972
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,1023,0.08668800195058186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,1023,0.07801066835721333
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,1023,0.08102400104204814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,1023,0.07683200140794118
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,1023,0.07660800218582153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,1023,0.07644799848397572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,2047,0.1199679970741272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,2047,0.16382933656374613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,2047,0.11533332864443462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,2047,0.11217600107192993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,2047,0.11129066348075867
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,2047,0.11185066898663838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,2047,0.1102186640103658
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,2047,0.12025066216786702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,2047,0.1123466690381368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,2047,0.16420267025629678
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,2047,0.11528533697128296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,2047,0.11173333724339803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,2047,0.1106773316860199
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,2047,0.11089600125948589
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,4095,0.27355732520421344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,4095,0.1840160091718038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,4095,0.18816532691319784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,4095,0.17874133586883545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,4095,0.1793760061264038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,4095,0.18046400944391885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,4095,0.27428267399470013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,4095,0.1774186690648397
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,4095,0.1888266603151957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,4095,0.18278932571411133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,4095,0.18002132574717203
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,4095,0.1792479952176412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,4095,0.1787733236948649
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,4095,0.178874671459198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,8191,0.4986613194147746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,8191,0.3216746648152669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,8191,0.31457600990931195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,8191,0.3126773238182068
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,8191,0.3107093373934428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,8191,0.31011199951171875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,8191,0.31009600559870404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,8191,0.3208853403727214
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,8191,0.31218665838241577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,8191,0.31512532631556195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,8191,0.4988693396250407
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,8191,0.3130026658376058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,8191,0.3103253245353699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,8191,0.30955199400583905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,16383,0.9301226933797201
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,16383,0.6330026785532633
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,16383,0.6081813176472982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,16383,0.5820266803105673
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,16383,0.5825493335723877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,16383,0.5865973234176636
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,16383,0.5945440133412679
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,16383,0.9273227055867513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,16383,0.5961120128631592
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,16383,0.6090240081151327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,16383,0.5870720148086548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,16383,0.6334666808446249
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,16383,0.5834879875183105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,16383,0.5816426674524943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,32767,1.1675999959309895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,32767,1.783141295115153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,32767,1.1378080050150554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,32767,1.1102933088938396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,32767,1.1218079725901287
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,32767,1.10480531056722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,32767,1.107034683227539
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,32767,1.7839573224385579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,32767,1.1377333005269368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,32767,1.1641173362731934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,32767,1.1211466789245605
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,32767,1.1112053394317627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,32767,1.107690652211507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,32767,1.1040053367614746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,65535,3.5022080739339194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,65535,2.1745386123657227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,65535,2.163306713104248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,65535,2.1923680305480957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,65535,2.222965399424235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,65535,2.1601120630900064
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,65535,2.157973289489746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,65535,3.4945332209269204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,65535,2.21888001759847
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,65535,2.1899627049764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,65535,2.1641546885172525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,65535,2.1764586766560874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,65535,2.1614294052124023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,65535,2.1577173868815103
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,128,1,1,131071,6.921248118082683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,128,1,32,131071,4.258933385213216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,128,1,16,131071,4.258517265319824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,128,1,64,131071,4.249605178833008
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,128,1,8,131071,4.268805185953776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,128,1,2,131071,4.318570772806804
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,128,1,4,131071,4.28656005859375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,128,1,1,131071,6.923850377400716
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,1,0.0688266654809316
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,1,0.09056533376375835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,1,0.061424002051353455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,1,0.05780800183614095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,1,0.056261335810025535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,128,1,8,131071,4.26910400390625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,1,0.055173332492510475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,1,0.054976001381874084
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,128,1,64,131071,4.251935958862305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,128,1,2,131071,4.319429397583008
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,128,1,32,131071,4.254922548929851
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,1,0.09040533502896626
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,128,1,4,131071,4.28712526957194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,128,1,16,131071,4.260608037312825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,1,0.06868800024191539
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,1,0.06121066709359487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,1,0.05508266886075338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,1,0.055813332398732506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,1,0.05765866736570994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,1,0.05496533215045929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,3,0.06118399898211161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,3,0.09001066287358601
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,3,0.0688213308652242
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,3,0.05764266848564148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,3,0.055888002117474876
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,3,0.05486399928728739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,3,0.05493866900602976
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,3,0.0897226631641388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,3,0.061039999127388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,3,0.06843733290831248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,3,0.05750933289527893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,3,0.05508266886075338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,3,0.054586668809254967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,3,0.055973331133524575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,7,0.06840533514817555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,7,0.060789331793785095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,7,0.05479466418425242
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,7,0.08938133716583252
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,7,0.05707733333110809
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,7,0.05428266525268555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,7,0.05560533205668131
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,7,0.05712000032265981
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,7,0.06076799829800924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,7,0.06870933373769124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,7,0.08928533395131429
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,7,0.05490666627883911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,7,0.05569066603978475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,15,0.08896533648173015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,7,0.05420266588528951
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,15,0.05975466469923655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,15,0.056314667065938316
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,15,0.0679253339767456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,15,0.05469333132108053
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,15,0.05401599903901418
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,15,0.0536053329706192
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,15,0.06760000189145406
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,15,0.08861333131790161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,15,0.05958933134873708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,15,0.05460800230503082
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,15,0.05619733532269796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,15,0.05363733569780985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,15,0.05398400127887726
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,31,0.05875200033187866
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,31,0.06818133095900218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,31,0.053616002202034
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,31,0.05535466472307841
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,31,0.05292266607284546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,31,0.08898133039474487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,31,0.052602668603261314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,31,0.05885333319505056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,31,0.06879466772079468
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,31,0.05500799914201101
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,31,0.08910399675369263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,31,0.05369600156943003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,31,0.052746668457984924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,63,0.09090133508046468
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,31,0.05263466636339823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,63,0.05357866485913595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,63,0.05782400071620941
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,63,0.07241066793600719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,63,0.051088000337282814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,63,0.0518453319867452
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,63,0.05068266888459524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,63,0.09059733152389526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,63,0.05804799993832906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,63,0.05357866485913595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,63,0.051669334371884666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,63,0.050981332858403526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,63,0.07281599938869476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,63,0.05053866902987162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,127,0.09797333677609761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,127,0.06734933455785115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,127,0.060453335444132485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,127,0.07765866816043854
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,127,0.05682666599750519
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,127,0.05514133473237356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,127,0.05494399865468343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,127,0.07776000102361043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,127,0.06742933392524719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,127,0.06019733349482218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,127,0.056645333766937256
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,127,0.055311997731526695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,127,0.0981226662794749
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,127,0.05457599957784017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,255,0.08687999844551086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,255,0.07677866518497467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,255,0.07201600074768066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,255,0.06975466509660085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,255,0.06859200199445088
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,255,0.06818666557470958
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,255,0.11097600062688191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,255,0.07680533329645793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,255,0.07203199962774913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,255,0.08684800068537395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,255,0.0697920024394989
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,255,0.06861866513888042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,255,0.11150933305422465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,255,0.06832000116507213
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,511,0.09550933043162028
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,511,0.1388213336467743
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,511,0.08987200260162354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,511,0.10595200459162395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,511,0.08809066812197368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,511,0.08633599678675334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,511,0.08703466256459554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,511,0.13889066378275552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,511,0.08785600463549297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,511,0.10628267129262288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,511,0.09074667096138
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,511,0.08749333024024963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,511,0.08610666791598003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,511,0.09548266728719075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,1023,0.19387733936309814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,1023,0.1305333375930786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,1023,0.12408000230789185
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,1023,0.12164266904195149
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,1023,0.12083199620246887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,1023,0.1404906709988912
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,1023,0.11987200379371643
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,1023,0.14065600434939066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,1023,0.19403199354807535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,1023,0.13060800234476724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,1023,0.1220853328704834
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,1023,0.12075733145078023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,1023,0.12437333663304646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,2047,0.207914670308431
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,1023,0.11953066786130269
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,2047,0.3056640028953552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,2047,0.1985493302345276
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,2047,0.1919040083885193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,2047,0.18982932964960733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,2047,0.18910932540893555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,2047,0.18721065918604532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,2047,0.18875199556350708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,2047,0.19690134127934775
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,2047,0.3042186697324117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,2047,0.19196800390879312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,2047,0.18775999546051025
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,2047,0.1870186726252238
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,2047,0.20780799786249796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,4095,0.5216853221257528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,4095,0.3408426841100057
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,4095,0.3320533235867818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,4095,0.3246399958928426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,4095,0.32293333609898883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,4095,0.321669340133667
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,4095,0.32101333141326904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,4095,0.33977067470550537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,4095,0.5237919886906942
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,4095,0.3313173254330953
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,4095,0.3269173304239909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,4095,0.32230399052302044
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,4095,0.3209279974301656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,4095,0.3213866750399272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,8191,0.9502346515655518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,8191,0.6481920083363851
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,8191,0.6066506703694662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,8191,0.5954613288243612
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,8191,0.5924746592839559
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,8191,0.5875999927520752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,8191,0.6239999930063883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,8191,0.9523146947224935
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,8191,0.6485333442687988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,8191,0.6219946543375651
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,8191,0.6057493289311727
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,8191,0.5967413187026978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,8191,0.5922880172729492
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,8191,0.5874826510747274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,16383,1.8051306406656902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,16383,1.1857173442840576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,16383,1.1546346346537273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,16383,1.1351093451182048
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,16383,1.1232906977335613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,16383,1.1199733416239421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,16383,1.115104039510091
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,16383,1.8135147094726562
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,16383,1.1358880201975505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,16383,1.1545920372009277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,16383,1.186245361963908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,16383,1.1242187023162842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,16383,1.117077350616455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,16383,1.1141760349273682
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,32767,3.518207867940267
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,32767,2.247333367665609
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,32767,2.210341294606527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,32767,2.187274614969889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,32767,2.1705546379089355
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,32767,2.163818677266439
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,32767,2.1731200218200684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,32767,3.520458539326986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,32767,2.210890611012777
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,32767,2.247002601623535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,32767,2.1889120737711587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,32767,2.1671573321024575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,32767,2.1661972999572754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,32767,2.1778613726298013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,256,1,2,65535,4.3508907953898115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,256,1,8,65535,4.289893468221028
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,256,1,4,65535,4.309434572855632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,256,1,16,65535,4.27840518951416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,256,1,64,65535,4.263584136962891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,256,1,32,65535,4.26686414082845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,256,1,1,65535,6.947381337483724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,256,1,1,65535,6.944906870524089
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,1,0.15858667095502219
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,1,0.11222933729489644
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,1,0.09481599926948547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,1,0.08720533053080241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,1,0.08249066770076752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,1,0.08392533659934998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,256,1,2,65535,4.35207462310791
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,1,0.08190399905045827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,1,0.15833066900571188
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,1,0.11209066708882649
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,1,0.09481599926948547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,256,1,16,65535,4.271914800008138
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,1,0.08719467123349507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,256,1,8,65535,4.292618751525879
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,1,0.08385066191355388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,256,1,64,65535,4.264650662740071
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,1,0.08246399958928426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,256,1,4,65535,4.31335989634196
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,256,1,32,65535,4.259685198465983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,1,0.08195200065771739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,3,0.09426132837931316
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,3,0.11091732978820801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,3,0.0867680013179779
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,3,0.15625066558519998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,3,0.08335999647776286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,3,0.081535999973615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,3,0.08218666911125183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,3,0.09454933802286784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,3,0.08661333719889323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,3,0.11101866761843364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,3,0.08170666793982188
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,3,0.08341333270072937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,3,0.081386665503184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,3,0.1558133363723755
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,7,0.15520532925923666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,7,0.0860693355401357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,7,0.09407466650009155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,7,0.0825493335723877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,7,0.08081600069999695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,7,0.081386665503184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,7,0.11035199960072835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,7,0.15424000223477682
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,7,0.08634133140246074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,7,0.09411733349164327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,7,0.08239466448624928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,7,0.11079999804496765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,7,0.08135466774304707
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,7,0.08075200021266937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,15,0.15390933553377786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,15,0.08065600196520488
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,15,0.08482666810353597
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,15,0.09324799974759419
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,15,0.11061867078145345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,15,0.07982933521270752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,15,0.07874133189519246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,15,0.07940266529719035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,15,0.09311466415723164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,15,0.15435199936230978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,15,0.11025066177050273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,15,0.07894399762153625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,15,0.08474666873613994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,15,0.08124266564846039
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,31,0.15177067120869955
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,31,0.09261332949002583
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,31,0.07896533111731212
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,31,0.0825973351796468
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,31,0.11104533076286316
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,31,0.07683200140794118
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,31,0.07762133578459422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,31,0.1521440049012502
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,31,0.09230400125185649
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,31,0.07903466622034709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,31,0.0830026666323344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,31,0.07745066781838734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,31,0.07694399853547414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,31,0.11040533582369487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,63,0.15607466300328574
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,63,0.1141919990380605
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,63,0.09537067015965779
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,63,0.08573333422342937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,63,0.0751093327999115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,63,0.07854933540026347
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,63,0.07295999924341838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,63,0.15541332960128784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,63,0.11452266573905945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,63,0.08567999800046285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,63,0.09512533744176228
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,63,0.07835733393828075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,63,0.0745600014925003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,63,0.07249066730340321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,127,0.17062934239705405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,127,0.09447466333707173
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,127,0.10408000151316325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,127,0.08703466256459554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,127,0.08965333302815755
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,127,0.1230506698290507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,127,0.087226668993632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,127,0.17103999853134155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,127,0.10354133447011311
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,127,0.08797333637873332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,127,0.09451733032862346
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,127,0.08700266480445862
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,127,0.12265066305796306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,127,0.08987200260162354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,255,0.1976906657218933
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,255,0.14099733034769693
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,255,0.10876799623171489
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,255,0.1118986705938975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,255,0.10550399621327718
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,255,0.12173333764076233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,255,0.10470400253931682
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,255,0.1970133384068807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,255,0.11265599727630615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,255,0.12153599659601848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,255,0.10598933696746826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,255,0.10801600416501363
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,255,0.14097066720326742
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,255,0.10436800122261047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,511,0.17751999696095785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,511,0.25334399938583374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,511,0.1574133336544037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,511,0.14282666643460593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,511,0.14808533589045206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,511,0.1393066644668579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,511,0.141077329715093
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,511,0.250602662563324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,511,0.14288000265757242
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,511,0.15742400288581848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,511,0.14829333623250326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,511,0.14064000050226846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,511,0.178165336449941
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,511,0.139573335647583
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,1023,0.36403199036916095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,1023,0.21105599403381348
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,1023,0.21641600131988525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,1023,0.20664000511169434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,1023,0.2255786657333374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,1023,0.24689066410064697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,1023,0.20901866753896078
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,1023,0.24779200553894043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,1023,0.21619200706481934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,1023,0.21065066258112589
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,1023,0.22609599431355795
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,1023,0.20834134022394815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,1023,0.20542399088541666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,1023,0.3649173180262248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,2047,0.3439519802729289
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,2047,0.3580160140991211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,2047,0.34779731432596844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,2047,0.5779786507288615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,2047,0.33955200513203937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,2047,0.3397013346354167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,2047,0.3803306818008423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,2047,0.35862934589385986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,2047,0.34727466106414795
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,2047,0.339408000310262
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,2047,0.342682679494222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,2047,0.34098132451375324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,2047,0.37860266367594403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,2047,0.5767199993133545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,4095,1.0040640036265056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,4095,0.6254133383433024
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,4095,0.6815306345621744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,4095,0.6482933362325033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,4095,0.6139520009358724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,4095,0.6038506825764974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,4095,0.6071840127309164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,4095,1.0063199996948242
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,4095,0.6813279787699381
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,4095,0.6292426586151123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,4095,0.6160693168640137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,4095,0.6478240092595419
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,4095,0.6030933459599813
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,4095,0.6079733371734619
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,8191,1.8548693656921387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,8191,1.2321866353352864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,8191,1.1863199869791667
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,8191,1.1622560024261475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,8191,1.1451786359151204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,8191,1.1370986302693684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,8191,1.1316426595052083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,8191,1.862064043680827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,8191,1.186479965845744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,8191,1.2320746580759685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,8191,1.161402702331543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,8191,1.1362133026123047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,8191,1.1453333695729573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,8191,1.132314682006836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,16383,3.5692052841186523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,16383,2.291813373565674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,16383,2.2164053916931152
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,16383,2.247045358022054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,16383,2.197546641031901
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,16383,2.188543955485026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,16383,2.184783935546875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,16383,3.566399892171224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,16383,2.290272076924642
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,16383,2.2474986712137857
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,16383,2.2160746256510415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,16383,2.1976265907287598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,16383,2.1887200673421225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,16383,2.1839680671691895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,512,1,1,32767,7.010266621907552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,512,1,2,32767,4.406245231628418
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,512,1,4,32767,4.352815945943196
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,512,1,16,32767,4.29746659596761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,512,1,64,32767,4.276853243509929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,512,1,32,32767,4.285392125447591
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,512,1,8,32767,4.3198239008585615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,512,1,1,32767,6.990304311116536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,1,0.19150400161743164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,1,0.2945866584777832
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,1,0.1618613302707672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,1,0.1479680041472117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,1,0.1393226683139801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,1,0.13711466391881308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,1,0.13518933455149332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,1,0.19158399105072021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,1,0.29468266169230145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,512,1,4,32767,4.354133288065593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,512,1,2,32767,4.409909248352051
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,1,0.14781333009401956
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,1,0.16216533382733664
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,1,0.13562132914861044
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,1,0.13962133725484213
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,1,0.13733866810798645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,3,0.1460479994614919
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,3,0.288592000802358
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,3,0.160261332988739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,3,0.18953599532445273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,3,0.13784000277519226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,3,0.13480533162752786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,3,0.1334986686706543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,512,1,8,32767,4.321621259053548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,3,0.14594667156537375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,3,0.16030933459599814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,3,0.18969066937764487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,512,1,16,32767,4.300042788187663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,3,0.2892959912618001
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,3,0.13313600420951843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,512,1,64,32767,4.281717300415039
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,3,0.1351040005683899
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,3,0.13778666655222574
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,512,1,32,32767,4.288874626159668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,7,0.1363040010134379
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,7,0.14484266440073648
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,7,0.15878400206565857
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,7,0.133050670226415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,7,0.1888266603151957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,7,0.1318880021572113
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,7,0.28622933228810626
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,7,0.14476266503334045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,7,0.15902400016784668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,7,0.135754664738973
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,7,0.1332533359527588
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,7,0.18881599108378092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,7,0.13169067104657492
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,7,0.28622400760650635
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,15,0.14284800489743552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,15,0.1343839963277181
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,15,0.1564906636873881
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,15,0.1307199994723002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,15,0.18757865826288858
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,15,0.2852213382720947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,15,0.1288586656252543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,15,0.14244799812634787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,15,0.1344586710135142
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,15,0.13030933340390524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,15,0.15710399548212686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,15,0.12921067078908285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,15,0.18780799706776938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,15,0.2842666705449422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,31,0.13874133427937826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,31,0.13129066427548727
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,31,0.154341330130895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,31,0.12777066230773926
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,31,0.125791996717453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,31,0.18583999077479044
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,31,0.1390613317489624
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,31,0.13102400302886963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,31,0.12775466839472452
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,31,0.15416533748308817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,31,0.27847999334335327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,31,0.12586133678754172
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,31,0.18614399433135986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,31,0.2794133424758911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,63,0.1383039951324463
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,63,0.12548266847928366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,63,0.1237440009911855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,63,0.1550986667474111
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,63,0.1295093297958374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,63,0.19021334250768027
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,63,0.2834666570027669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,63,0.12955199678738913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,63,0.1388106644153595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,63,0.15571733315785727
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,63,0.12372799714406331
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,63,0.282970666885376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,63,0.18863999843597412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,63,0.1256106694539388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,127,0.17305066188176474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,127,0.1458560029665629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,127,0.1544373333454132
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,127,0.14109866817792258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,127,0.13959999879201254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,127,0.20878932873408
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,127,0.3147306640942891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,127,0.17270400126775107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,127,0.1540426711241404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,127,0.14545599619547525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,127,0.14174933234850565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,127,0.14031466841697693
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,127,0.2084746758143107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,127,0.3125706712404887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,255,0.188591996828715
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,255,0.17458667357762656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,255,0.20576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,255,0.17548799514770508
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,255,0.24321067333221436
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,255,0.3668586810429891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,255,0.18082133928934732
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,255,0.20689600706100464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,255,0.18921067317326865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,255,0.1802826722462972
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,255,0.24331732590993246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,255,0.17633066574732462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,255,0.1749173402786255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,255,0.367301344871521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,511,0.2600746750831604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,511,0.2496053377787272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,511,0.2784159978230794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,511,0.24503467480341592
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,511,0.24222934246063232
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,511,0.3174399932225545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,511,0.47304534912109375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,511,0.2457866668701172
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,511,0.24235200881958008
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,511,0.25968533754348755
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,511,0.24979199965794882
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,511,0.2782026727994283
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,511,0.31732267141342163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,511,0.47204800446828205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,1023,0.41340800126393634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,1023,0.3842346668243408
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,1023,0.39452799161275226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,1023,0.6852266788482666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,1023,0.37863465150197345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,1023,0.37485333283742267
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,1023,0.45588799317677814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,1023,0.3855466842651367
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,1023,0.395087997118632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,1023,0.37916799386342365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,1023,0.3757546742757161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,1023,0.4137333234151204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,1023,0.45601598421732586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,1023,0.6848693688710531
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,2047,1.1176746686299641
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,2047,0.640714685122172
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,2047,0.6696053345998129
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,2047,0.6513813336690267
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,2047,0.7008106708526611
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,2047,0.6362346808115641
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,2047,0.7465493679046631
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,2047,0.6701066493988037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,2047,1.1144639650980632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,2047,0.6528480052947998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,2047,0.6353653271993002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,2047,0.6996213595072428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,2047,0.7449706395467123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,2047,0.6437600056330363
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,4095,1.9703787167867024
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,4095,1.1737279891967773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,4095,1.1640373071034749
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,4095,1.1891199747721355
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,4095,1.2111999988555908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,4095,1.3090453147888184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,4095,1.250810702641805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,4095,1.9671732584635417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,4095,1.1887093385060628
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,4095,1.2498453458150227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,4095,1.2111732959747314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,4095,1.1709173520406086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,4095,1.3110400040944417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,4095,1.1631893316904705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,8191,2.3961332639058432
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,8191,3.686533292134603
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,8191,2.2703839937845864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,8191,2.2418346405029297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,8191,2.3188533782958984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,8191,2.21507199605306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,8191,2.225605328877767
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,8191,3.671840031941732
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,8191,2.3928586641947427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,8191,2.3165653546651206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,8191,2.2752052942911782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,8191,2.244906743367513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,8191,2.226655960083008
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,8191,2.2156052589416504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,128,1024,1,1,16383,7.09275754292806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,64,1024,1,2,16383,4.5218079884847
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,32,1024,1,4,16383,4.434560139973958
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,16,1024,1,8,16383,4.377274513244629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,8,1024,1,16,16383,4.347546577453613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,4,1024,1,32,16383,4.326149304707845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,fp8,2,1024,1,64,16383,4.31546147664388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,128,1024,1,1,16383,7.107509613037109
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,32,1024,1,4,16383,4.428218523661296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,64,1024,1,2,16383,4.519770622253418
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,16,1024,1,8,16383,4.375845273335774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,8,1024,1,16,16383,4.345509211222331
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,2,1024,1,64,16383,4.313482602437337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_generation,default,float16,float16,4,1024,1,32,16383,4.328207969665527
