framework,version,device,op_name,kernel_source,bmm_dtype,num_tokens,num_heads,latency
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1,128,0.006028800085186958
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1,64,0.0048640001565217975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1,128,0.01557759940624237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1,32,0.004416000097990036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1,64,0.01053439974784851
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1,16,0.004102399945259095
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1,32,0.00806720033288002
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1,8,0.003958399966359138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1,16,0.008035200089216233
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1,4,0.007292799651622772
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1,8,0.007964800298213958
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1,2,0.004742399975657463
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1,1,0.003936000168323517
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1,4,0.006563200056552887
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2,128,0.0060479998588562015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1,1,0.006172800064086914
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2,64,0.004806400090456009
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2,128,0.01674560010433197
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2,32,0.007280000299215317
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1,2,0.005702399834990502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2,16,0.0060479998588562015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2,64,0.011046399921178817
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2,8,0.005606399849057198
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2,2,0.004783999919891357
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2,1,0.005417599901556968
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2,32,0.00838399976491928
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2,1,0.008185599744319916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4,128,0.01653759926557541
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2,4,0.004611200094223023
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4,128,0.006006399914622307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4,64,0.004870399832725525
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4,64,0.0115167997777462
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4,32,0.004067200049757957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2,16,0.00692799985408783
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4,8,0.005142400041222572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2,4,0.006195199862122536
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2,8,0.006377600133419037
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2,2,0.00589120015501976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4,16,0.004009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4,16,0.011193600296974183
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4,4,0.009916800260543823
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4,4,0.006297600269317627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4,2,0.005923200026154518
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4,1,0.005638400092720985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4,2,0.004441599920392036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8,128,0.005632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4,32,0.00825600028038025
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8,64,0.01112319976091385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8,32,0.004281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8,64,0.004732799902558327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8,8,0.00416640006005764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8,16,0.005702399834990502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4,8,0.006291200220584869
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4,1,0.005974400043487549
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8,128,0.01621759980916977
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8,4,0.006351999938488007
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8,32,0.008217599987983704
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8,2,0.005772799998521805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8,16,0.00719040036201477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,16,128,0.0058143999427556995
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8,1,0.004185599833726883
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8,8,0.006672000139951706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,16,64,0.004774399846792221
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,16,32,0.004252799972891808
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8,2,0.006332799792289734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,16,16,0.0038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8,4,0.0071552000939846035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,16,8,0.004239999875426293
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8,1,0.005936000123620033
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,16,8,0.006825599819421768
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,16,2,0.004950400069355964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,16,4,0.005846399813890457
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,16,64,0.011539199948310852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,16,4,0.006527999788522721
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,16,128,0.01727039963006973
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,16,32,0.008604799956083297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,16,1,0.005564799904823304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,16,16,0.007648000121116638
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,32,128,0.006220800057053566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,32,128,0.01936960071325302
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,32,64,0.005609599873423576
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,16,2,0.006239999830722809
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,32,8,0.005488000065088272
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,32,32,0.004668800160288811
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,16,1,0.006297600269317627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,32,16,0.0045471999794244765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,32,16,0.007884799689054488
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,32,8,0.007036799937486649
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,32,1,0.00398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,32,4,0.004310400038957596
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,32,64,0.012432000041007996
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,32,2,0.0038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,32,4,0.007161600142717361
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,32,32,0.008966399729251862
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,32,1,0.006255999952554703
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,48,128,0.006876800209283829
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,48,128,0.02107519954442978
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,48,64,0.005615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,48,32,0.0047680001705884935
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,32,2,0.006815999746322632
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,48,32,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,48,64,0.012995199859142303
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,48,16,0.004137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,48,16,0.008489599823951722
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,48,8,0.005657599866390228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,48,1,0.003974400088191032
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,48,8,0.007788799703121185
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,48,4,0.004224000126123428
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,48,4,0.007580800354480744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,48,2,0.004412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,48,2,0.006854400038719177
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,48,1,0.0067391999065876005
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,64,128,0.007612799853086471
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,64,64,0.005881600081920624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,64,64,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,64,32,0.004729599878191948
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,64,32,0.00981760025024414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,64,16,0.004233599826693535
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,64,16,0.008470399677753449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,64,8,0.004211200028657913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,64,128,0.022096000611782074
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,64,8,0.007916799932718276
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,64,4,0.004412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,64,4,0.007545600086450577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,64,2,0.003891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,64,2,0.0072672002017498015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,64,1,0.003974400088191032
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,64,1,0.01125440001487732
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,80,128,0.008224000036716462
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,80,64,0.00615679994225502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,80,32,0.005033599957823753
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,80,16,0.00469760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,80,8,0.004307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,80,4,0.003961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,80,2,0.004208000004291534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,80,1,0.004700800031423568
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,80,128,0.024854399263858795
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,96,128,0.008745600283145905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,96,128,0.025833600759506227
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,96,64,0.006431999802589417
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,80,64,0.015404799580574035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,96,32,0.005100800096988678
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,80,16,0.008777599781751633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,96,16,0.004560000076889992
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,80,8,0.00772159993648529
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,96,16,0.00974079966545105
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,96,8,0.004735999926924706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,80,1,0.006934399902820587
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,96,4,0.0041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,96,2,0.004108799993991852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,80,4,0.007491199672222138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,96,64,0.015820799767971037
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,80,2,0.00703359991312027
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,96,1,0.004816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,128,128,0.00992320030927658
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,128,64,0.007302399724721909
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,96,32,0.010543999820947647
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,128,32,0.005244800075888634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,80,32,0.010742399841547012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,128,32,0.011308799684047698
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,96,8,0.008009599894285202
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,128,16,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,96,4,0.00769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,128,8,0.004342399910092354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,96,2,0.007116799801588058
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,128,4,0.004732799902558327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,96,1,0.006911999732255936
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,128,2,0.004108799993991852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,128,1,0.006015999987721443
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,128,128,0.028278398513793945
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,160,128,0.011840000003576278
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,128,64,0.016223999857902526
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,160,128,0.04087679982185364
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,160,64,0.007942400127649307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,160,64,0.022595199942588805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,128,16,0.00897279977798462
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,160,32,0.0056639999151229855
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,160,16,0.0046720001846551895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,128,4,0.00785600021481514
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,128,2,0.007462400197982788
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,128,1,0.006956800073385239
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,160,8,0.0042975999414920805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,160,4,0.004182400181889534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,128,8,0.008454400300979614
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,160,2,0.00416640006005764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,160,32,0.014368000626564025
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,160,1,0.004086399823427201
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,192,128,0.016233600676059723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,192,128,0.044758400321006774
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,192,64,0.00814720019698143
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,192,32,0.006073600053787232
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,192,16,0.004915200173854828
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,160,16,0.009984000027179718
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,160,2,0.007440000027418137
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,192,8,0.0043136000633239744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,192,4,0.004143999889492989
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,160,8,0.008499199897050858
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,192,2,0.003990399837493897
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,160,4,0.007788799703121185
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,192,2,0.008416000008583068
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,192,1,0.004131200164556504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,192,1,0.007398399710655213
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,192,64,0.02351039946079254
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,160,1,0.00735040009021759
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,256,128,0.021929599344730377
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,192,32,0.015107199549674988
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,256,64,0.009433600306510925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,256,32,0.006889600306749344
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,192,16,0.010038399696350097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,256,32,0.017836800217628478
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,256,16,0.005270399898290634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,256,16,0.011379200220108032
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,192,8,0.008662399649620057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,256,8,0.00724480003118515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,192,4,0.007766400277614593
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,256,4,0.004335999861359597
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,256,2,0.003945599868893623
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,256,1,0.004278400167822838
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,256,1,0.009254399687051773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,320,128,0.024137599766254424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,320,64,0.011875200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,256,128,0.048809599876403806
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,320,64,0.035497599840164186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,320,32,0.007056000083684922
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,320,32,0.019920000433921815
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,256,64,0.02588160037994385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,320,16,0.009737599641084671
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,320,8,0.004800000041723251
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,320,4,0.004208000004291534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,320,2,0.0039680000394582745
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,256,8,0.008755200356245042
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,320,1,0.003961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,256,4,0.0077632002532482145
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,384,128,0.027606400847434997
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,256,2,0.007552000135183335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,384,128,0.06857280135154724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,384,64,0.014419199526309967
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,384,32,0.008137600123882293
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,320,128,0.06692159771919251
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,384,32,0.02056639939546585
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,384,16,0.005907199904322624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,384,8,0.0047520000487566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,320,1,0.007235199958086014
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,384,4,0.004339199885725975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,384,64,0.036406400799751285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,320,16,0.012831999361515046
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,384,2,0.004006399959325791
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,320,4,0.007923199981451034
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,320,8,0.0086496002972126
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,384,16,0.01297599971294403
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,384,1,0.00416640006005764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,320,2,0.007612799853086471
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,384,1,0.007491199672222138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,512,128,0.03366400003433227
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,384,8,0.008633600175380706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,512,64,0.018991999328136444
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,512,64,0.047091200947761536
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,512,32,0.009683199971914292
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,512,16,0.007014399766921997
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,512,8,0.00517439991235733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,512,4,0.004467200115323066
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,384,4,0.008364800363779068
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,512,2,0.004214400053024292
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,512,1,0.004134399816393852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,512,1,0.009935999661684037
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,384,2,0.007936000078916549
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,768,128,0.04656960070133209
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,768,128,0.12646399736404418
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,768,64,0.02646079957485199
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,768,64,0.06665599942207337
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,512,128,0.08824959993362427
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,768,32,0.013795199990272521
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,768,32,0.035180801153182985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,512,32,0.024889600276947022
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,768,16,0.00833280012011528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,512,16,0.014985600113868713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,768,8,0.006086400151252747
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,768,8,0.012883199751377106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,768,4,0.004879999905824661
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,768,2,0.0048096001148223875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,768,1,0.004134399816393852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1024,128,0.0596448004245758
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1024,128,0.16530560255050658
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,512,8,0.010742399841547012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1024,64,0.033641600608825685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,512,4,0.008908800035715102
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,512,2,0.008044800162315369
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1024,64,0.08631680011749268
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1024,32,0.019049599766731262
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,768,16,0.01963520050048828
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1024,16,0.009721600264310837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1024,8,0.0069311998784542085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1024,4,0.005296000093221664
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1024,2,0.004566400125622749
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,768,2,0.008326400071382523
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1024,32,0.04574080109596253
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,768,4,0.00891520008444786
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,768,1,0.00764160007238388
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1024,16,0.02476159930229187
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1024,1,0.004812800139188766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1024,8,0.014591999351978302
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1536,128,0.08746240139007569
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1536,128,0.24225280284881592
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1024,4,0.010489600151777268
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1536,64,0.046623998880386354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1536,32,0.025724801421165466
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1024,2,0.008739200234413148
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1536,32,0.06607360243797303
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1536,16,0.01401280015707016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1536,16,0.03579840064048767
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1536,8,0.00827839970588684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1024,1,0.008057600259780884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1536,4,0.008019199967384339
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1536,64,0.12396800518035889
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1536,2,0.004886399954557419
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,1536,1,0.004403200000524521
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1536,8,0.019468800723552705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2048,128,0.11425280570983887
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2048,128,0.32052481174468994
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2048,64,0.05835199952125549
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1536,4,0.012428800016641617
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2048,64,0.16402239799499513
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2048,32,0.0318336009979248
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2048,32,0.08623039722442627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2048,16,0.019200000166893005
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2048,8,0.009443199634552002
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1536,2,0.0088639996945858
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2048,4,0.007568000257015229
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,1536,1,0.008185599744319916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2048,2,0.005104000121355057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,2048,1,0.004659200087189674
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,3072,128,0.1774943947792053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2048,16,0.04534080028533936
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,3072,128,0.47220478057861326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,3072,64,0.08461440205574036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,3072,64,0.24014720916748047
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,3072,32,0.04541760087013245
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,3072,32,0.12372159957885742
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,3072,16,0.024963200092315674
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,3072,8,0.014057600498199463
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,3072,8,0.03400320112705231
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,3072,4,0.00833280012011528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2048,8,0.02459840029478073
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,3072,2,0.006015999987721443
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,3072,2,0.013055999577045441
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2048,4,0.014953599870204925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2048,2,0.010127999633550645
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,2048,1,0.008687999844551087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,3072,1,0.006252799928188324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,3072,16,0.06453760266304016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4096,128,0.2405535936355591
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4096,128,0.6248672008514404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4096,64,0.11009600162506103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4096,32,0.05810239911079407
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4096,32,0.16164159774780273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4096,16,0.031836798787117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4096,16,0.08510079979896545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4096,8,0.017385600507259368
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4096,8,0.04505920112133026
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4096,4,0.00936639979481697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4096,2,0.007103999704122543
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,4096,1,0.0062304001301527025
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4096,2,0.015072000026702882
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,6144,128,0.35266880989074706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,6144,64,0.1640128016471863
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,6144,128,0.918131160736084
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,6144,64,0.4660672187805176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,6144,32,0.08277119994163513
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,3072,4,0.019734400510787963
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4096,64,0.31604480743408203
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,6144,16,0.047753599286079404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,6144,16,0.12373440265655518
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,6144,8,0.024710400402545928
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,6144,8,0.06440320014953613
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,6144,4,0.013820800185203552
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,6144,4,0.03409920036792755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,3072,1,0.008591999858617782
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,6144,2,0.008399999886751174
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4096,4,0.023996800184249878
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,6144,1,0.009491200000047684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8192,128,0.47884478569030764
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8192,64,0.21654078960418702
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8192,128,1.2099424362182618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8192,64,0.6181759834289551
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8192,32,0.10863679647445679
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8192,16,0.05758399963378906
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8192,16,0.16316479444503784
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8192,8,0.033766400814056394
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8192,4,0.017820799350738527
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8192,2,0.009964799880981446
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,4096,1,0.010041599720716476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,6144,2,0.01912959963083267
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,float16,8192,1,0.006892800331115723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,6144,1,0.01233920007944107
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,6144,32,0.23796479701995848
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8192,32,0.31604800224304197
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8192,4,0.04482559859752655
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8192,1,0.01419840008020401
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8192,8,0.08376960158348083
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_pre,default,fp8,8192,2,0.02446720004081726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1,128,0.005750399827957153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1,64,0.005023999884724617
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1,128,0.011718399822711945
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1,32,0.004502400010824204
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1,64,0.009216000139713288
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1,32,0.00846719965338707
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1,8,0.006012799963355064
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1,16,0.004153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1,4,0.006473600119352341
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1,16,0.007718399912118912
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1,2,0.004227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1,8,0.007286400347948074
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1,4,0.006137600168585777
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1,1,0.004755200073122978
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1,2,0.006364800035953522
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2,128,0.005859199911355972
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2,64,0.0052767999470233916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2,128,0.01241919994354248
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2,64,0.009564799815416336
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1,1,0.006003199890255928
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2,4,0.0044319998472929
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2,4,0.006412799656391144
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2,32,0.004550400003790855
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2,2,0.004476799815893173
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2,1,0.0041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2,8,0.005305600166320801
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2,16,0.004502400010824204
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4,128,0.005459199845790863
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4,64,0.005593600124120713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4,32,0.009055999666452407
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2,32,0.008355200290679932
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4,16,0.006787200272083282
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4,16,0.008191999793052674
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2,16,0.007391999661922455
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2,8,0.00697920024394989
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4,8,0.005091200023889542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2,2,0.006700800359249115
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4,128,0.012150400131940842
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4,64,0.009382399916648864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4,2,0.004604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4,1,0.004300799965858459
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2,1,0.006371200084686279
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4,4,0.004323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4,32,0.0079584002494812
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4,1,0.006563200056552887
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8,128,0.005612799897789955
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8,128,0.014076800644397735
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4,8,0.006563200056552887
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8,64,0.0045311998575925825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8,16,0.007113599777221679
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4,4,0.006812799721956253
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8,32,0.007571200281381607
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4,2,0.006080000102519989
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8,16,0.004867200180888176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8,8,0.0074432000517845156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8,8,0.006774400174617767
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8,64,0.009491200000047684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8,4,0.004361600056290627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8,4,0.0069920003414154054
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8,32,0.008166400343179702
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8,2,0.00421760007739067
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16,64,0.011046399921178817
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8,1,0.009116800129413604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16,128,0.005817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16,16,0.004358400031924248
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16,128,0.014556799829006196
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16,64,0.004700800031423568
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16,32,0.004447999969124794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16,32,0.008710400015115739
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8,2,0.006764800101518631
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16,8,0.004892800003290176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8,1,0.006540799885988236
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16,4,0.0047775998711586
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16,2,0.0043136000633239744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16,2,0.00687360018491745
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16,1,0.006892800331115723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16,1,0.006585600227117539
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16,16,0.007753600180149078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,32,128,0.006032000109553337
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,32,64,0.005052800104022026
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,32,32,0.009929600358009338
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16,8,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16,4,0.006921599805355072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,32,32,0.004575999826192856
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,32,16,0.004473600164055824
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,32,8,0.004374400153756142
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,32,8,0.007727999985218048
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,32,128,0.017088000476360322
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,32,4,0.00469760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,32,64,0.011884800344705581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,32,4,0.0072672002017498015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,32,2,0.004239999875426293
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,32,16,0.008083199709653854
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,32,1,0.006012799963355064
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,48,32,0.00469760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,48,128,0.006601600348949433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,48,64,0.005193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,32,2,0.007158400118350982
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,48,8,0.004588799923658371
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,48,32,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,32,1,0.006876800209283829
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,48,16,0.004543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,48,128,0.019776000082492827
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,48,64,0.01419519931077957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,48,4,0.004361600056290627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,48,2,0.004361600056290627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,48,16,0.008771199733018875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,48,1,0.004300799965858459
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,48,1,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,48,8,0.008083199709653854
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,64,128,0.007116799801588058
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,64,128,0.022809599339962006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,48,4,0.007715199887752533
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,64,16,0.009958399832248688
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,64,64,0.005225599929690361
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,64,8,0.004745600000023842
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,64,64,0.014681600034236908
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,64,4,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,64,32,0.0047680001705884935
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,64,32,0.011033599823713302
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,48,2,0.007478400319814682
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,64,16,0.0046720001846551895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,64,4,0.004220800101757049
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,64,2,0.005481600016355515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,64,1,0.0046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,64,8,0.008310399949550629
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,80,32,0.00485760010778904
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,80,128,0.007519999891519547
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,80,64,0.005449600145220756
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,64,2,0.007612799853086471
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,64,1,0.007411199808120728
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,80,16,0.00549440011382103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,80,4,0.007603199779987335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,80,8,0.004332799836993218
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,80,2,0.0071552000939846035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,80,64,0.01668799966573715
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,80,128,0.026748800277709962
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,80,32,0.011923199892044068
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,80,1,0.004332799836993218
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,96,128,0.007939200103282928
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,80,16,0.010102400183677673
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,96,64,0.00562559999525547
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,80,8,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,96,32,0.004992000013589859
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,80,4,0.0075935997068881985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,96,16,0.010739199817180634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,96,16,0.004726399853825569
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,96,128,0.029452800750732422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,96,64,0.01800000071525574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,80,2,0.007372800260782242
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,96,8,0.0044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,96,4,0.004377600178122521
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,80,1,0.007203199714422226
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,96,2,0.004287999868392944
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,96,32,0.012291199713945388
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,96,2,0.007727999985218048
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,96,1,0.004265600070357323
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,128,128,0.008918400108814239
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,96,8,0.008739200234413148
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,128,128,0.03535360097885132
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,96,1,0.007305599749088287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,128,64,0.006121600046753883
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,96,4,0.008032000064849854
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,128,32,0.004931199923157692
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,128,16,0.00485760010778904
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,128,8,0.004569600149989128
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,128,64,0.020556800067424774
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,128,4,0.005238400027155876
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,128,4,0.00817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,128,2,0.004348799958825112
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,128,1,0.005593600124120713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,128,32,0.01388159990310669
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,160,128,0.009971199929714203
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,160,64,0.006540799885988236
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,128,16,0.010976000130176545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,160,32,0.005161599814891815
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,128,8,0.009411200135946273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,160,16,0.004956800118088722
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,160,8,0.004611200094223023
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,128,2,0.00793280005455017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,128,1,0.007465600222349167
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,160,4,0.004332799836993218
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,160,128,0.044860801100730895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,160,2,0.00424639992415905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,160,1,0.004403200000524521
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,160,1,0.007574400305747986
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,192,128,0.012319999933242797
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,192,128,0.04932479858398438
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,160,8,0.010332799702882766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,192,64,0.006966400146484375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,160,4,0.008697599917650223
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,192,32,0.005615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,160,2,0.007974400371313094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,160,64,0.024281600117683412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,160,32,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,160,16,0.011644800007343293
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,192,16,0.004841599985957146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,192,8,0.004444799944758415
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,192,4,0.004278400167822838
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,192,32,0.017030400037765504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,192,64,0.026659199595451356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,192,4,0.008630400151014328
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,192,2,0.00432640016078949
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,192,2,0.010425599664449692
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,192,1,0.004169600084424019
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,192,16,0.012492799758911132
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,192,1,0.007760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,256,128,0.018620799481868743
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,192,8,0.01064639985561371
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,256,16,0.004873599857091904
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,256,64,0.0075552001595497135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,256,8,0.005283199995756149
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,256,32,0.0059935998171567915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,256,8,0.010899200290441512
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,256,128,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,256,4,0.004534399881958961
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,256,2,0.005212799832224846
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,256,64,0.03229759931564331
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,256,2,0.008726400136947633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,256,32,0.01934400051832199
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,320,64,0.008367999643087386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,256,1,0.0044319998472929
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,320,32,0.006611199676990509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,256,16,0.013817599415779114
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,320,128,0.02202879935503006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,256,4,0.00936639979481697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,320,64,0.04267840087413788
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,256,1,0.007852800190448761
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,320,128,0.07520639896392822
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,320,16,0.005142400041222572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,320,8,0.007833600044250488
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,320,2,0.00910400003194809
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,320,4,0.004646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,320,4,0.010054399818181991
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,320,2,0.00432640016078949
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,320,1,0.004527999833226204
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,384,128,0.024633599817752837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,320,32,0.02303680032491684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,320,1,0.007936000078916549
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,384,32,0.006524799764156342
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,320,16,0.015574400126934052
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,384,64,0.009145600348711013
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,320,8,0.01135680004954338
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,384,32,0.0255295991897583
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,384,128,0.08648319840431214
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,384,4,0.010592000186443329
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,384,16,0.01668799966573715
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,384,16,0.00589120015501976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,384,1,0.007334399968385696
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,384,64,0.04776960015296936
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,384,8,0.005033599957823753
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,384,4,0.004681599885225296
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,384,2,0.006297600269317627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,384,2,0.008912000060081481
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,384,8,0.012003199756145477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,384,1,0.008224000036716462
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,512,16,0.00560000017285347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,512,128,0.030527999997138976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,512,128,0.1115839958190918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,512,64,0.05876799821853638
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,512,64,0.015750400722026825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,512,32,0.007942400127649307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,512,32,0.031068798899650574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,512,8,0.004899200052022934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,512,4,0.004732799902558327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,512,4,0.010911999642848969
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,512,2,0.004560000076889992
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,512,1,0.006636799871921539
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,512,16,0.019462400674819948
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,512,8,0.013548800349235534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,768,128,0.04198080003261566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,768,128,0.16189759969711304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,768,64,0.02328319996595383
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,512,1,0.008188799768686295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,768,32,0.009471999853849411
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,768,16,0.006473600119352341
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,512,2,0.009833600372076035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,768,8,0.005392000079154968
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,768,64,0.08447359800338745
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,768,4,0.004822399839758873
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,768,2,0.006355199962854385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,768,32,0.046003198623657225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,768,1,0.004422400146722794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1024,128,0.05403839945793152
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,768,1,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1024,128,0.21120638847351075
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,768,2,0.010332799702882766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1024,64,0.028988799452781676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,768,8,0.016383999586105348
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,768,16,0.025328001379966734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1024,32,0.012115199863910676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,768,4,0.01175680011510849
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1024,32,0.057980799674987794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1024,16,0.007900799810886382
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1024,8,0.005564799904823304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1024,4,0.004812800139188766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1024,16,0.030326399207115173
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1024,1,0.004441599920392036
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1024,4,0.013312000036239623
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1024,2,0.00493439994752407
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1024,2,0.01069760024547577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1536,128,0.07492160201072692
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1024,64,0.1105504035949707
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1024,8,0.0191103994846344
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1536,128,0.310479998588562
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1536,64,0.041433599591255185
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1536,64,0.15934400558471679
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1536,32,0.021510399878025055
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1536,16,0.009139200299978256
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1536,8,0.007091200351715088
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1536,4,0.0052512001246213915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1024,1,0.009353599697351455
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1536,2,0.00469760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,1536,1,0.004534399881958961
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1536,16,0.045228800177574156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1536,1,0.010051199793815612
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2048,128,0.09759680032730103
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2048,128,0.408406400680542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2048,64,0.05189120173454285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1536,32,0.08347839713096619
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2048,32,0.02810240089893341
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2048,32,0.10855040550231934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2048,16,0.011270400136709213
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2048,8,0.007756800204515457
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1536,8,0.024700799584388734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1536,4,0.016195200383663177
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2048,2,0.0049183998256921765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,1536,2,0.01178240031003952
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2048,4,0.005868799984455109
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2048,64,0.2091775894165039
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,2048,1,0.004720000177621841
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,3072,128,0.14302719831466676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2048,8,0.030019199848175047
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,3072,64,0.07486720085144043
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,3072,64,0.30882558822631834
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2048,4,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,3072,32,0.04079360067844391
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,3072,16,0.021587200462818146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,3072,8,0.009612800180912017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2048,16,0.057411199808120726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2048,2,0.013308799266815186
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,3072,4,0.0064351998269557955
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,2048,1,0.010486400127410889
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,3072,128,0.6081664085388183
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,3072,2,0.005385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,3072,32,0.15813440084457397
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,3072,1,0.004927999898791313
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,3072,16,0.0827679991722107
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,3072,8,0.045484799146652224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4096,128,0.1918879985809326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4096,64,0.408406400680542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4096,64,0.09450240135192871
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4096,32,0.050732797384262084
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4096,16,0.027340799570083618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4096,8,0.011401599645614624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4096,128,0.8089759826660157
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4096,4,0.007715199887752533
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,3072,2,0.01605439931154251
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4096,2,0.005689600110054016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4096,2,0.01884160041809082
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4096,32,0.20832319259643556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,3072,1,0.01188800036907196
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,4096,1,0.004844800010323525
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4096,16,0.1075551986694336
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,6144,128,0.27990720272064207
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4096,8,0.056720000505447385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,6144,64,0.6057216167449951
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,6144,128,1.1975008010864259
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,6144,64,0.13844480514526367
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,6144,32,0.07736319899559022
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,6144,32,0.30830719470977785
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4096,1,0.012883199751377106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,3072,4,0.024076800048351287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,6144,16,0.03965120017528534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,4096,4,0.02993920147418976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,6144,8,0.022809599339962006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,6144,4,0.009187199920415879
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,6144,2,0.00618240013718605
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,6144,1,0.0054303999990224835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8192,128,0.37567040920257566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,6144,16,0.15739840269088745
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8192,64,0.1807935953140259
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,6144,8,0.08223680257797242
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8192,64,0.8020544052124023
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8192,128,1.5891231536865233
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8192,32,0.09380159974098205
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8192,8,0.028832000494003297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,6144,4,0.044854399561882016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8192,32,0.40711040496826173
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8192,16,0.050595200061798094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8192,16,0.20697920322418212
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8192,4,0.011510399729013443
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8192,4,0.05663679838180542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8192,2,0.007795199751853943
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,8192,1,0.0055263999849557875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,12288,128,0.5669375896453858
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,12288,64,0.27959039211273196
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,12288,32,0.13706560134887696
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,12288,128,2.366044807434082
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,12288,64,1.1945792198181153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8192,8,0.10704319477081299
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,12288,32,0.6055583953857422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,6144,1,0.015855999290943147
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,12288,16,0.07784640192985534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,12288,16,0.3058464050292969
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,6144,2,0.02417600005865097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,12288,8,0.04160319864749908
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,12288,8,0.1577247977256775
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,12288,4,0.02279040068387985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,12288,2,0.00920960009098053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8192,2,0.029468798637390138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,12288,1,0.006224000081419945
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,8192,1,0.018694399297237395
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16384,128,0.7274208068847656
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16384,64,0.3516031980514526
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16384,32,0.19270720481872558
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16384,64,1.5851840019226073
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16384,16,0.09298239946365357
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16384,128,3.1499040603637694
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16384,16,0.40636157989501953
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16384,8,0.05061119794845581
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16384,4,0.027344000339508057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16384,8,0.20836160182952881
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16384,2,0.011519999802112579
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16384,2,0.05644479990005493
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,16384,1,0.007545600086450577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,12288,4,0.08206719756126404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,12288,2,0.04446400105953217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,20480,128,0.9256128311157227
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,20480,64,0.4666175842285156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,20480,32,0.22131519317626952
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,20480,64,1.9744607925415039
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,20480,128,3.9252254486083986
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,20480,16,0.12383359670639038
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,20480,32,0.9958880424499512
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,20480,16,0.5036223888397217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,20480,8,0.06449919939041138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,20480,8,0.2572511911392212
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,20480,4,0.035011199116706845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16384,32,0.8011391639709473
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,20480,4,0.13220479488372802
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,20480,2,0.017395199835300447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,20480,2,0.07195199728012085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,float16,20480,1,0.008380799740552902
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16384,4,0.10694719552993774
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,12288,1,0.024259200692176817
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,16384,1,0.02961280047893524
TRTLLM,1.2.0rc5,NVIDIA H200,mla_gen_post,default,fp8,20480,1,0.03802559971809387
