framework,version,device,op_name,kernel_source,bmm_dtype,num_tokens,num_heads,latency
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1,128,0.009600000083446502
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1,128,0.016230399906635284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1,64,0.006639999896287918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1,64,0.014815999567508698
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1,4,0.006691200286149978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1,2,0.004204799979925155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1,32,0.005212799832224846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1,8,0.00414079986512661
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1,16,0.00525440014898777
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1,4,0.004153599962592125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1,1,0.0044096000492572784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2,128,0.0059424001723527905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1,16,0.007132799923419952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1,8,0.006400000303983688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1,32,0.008236800134181977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2,64,0.004848000034689904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1,2,0.006355199962854385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2,128,0.0164000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2,32,0.004377600178122521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1,1,0.0056928001344203946
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2,32,0.008172799646854401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2,8,0.005731200054287911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2,16,0.005065599828958512
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2,64,0.011072000116109848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2,4,0.003974400088191032
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2,16,0.007503999769687653
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2,2,0.006304000318050384
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2,1,0.004540799930691719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2,8,0.006672000139951706
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2,2,0.004153599962592125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2,4,0.00663359984755516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4,128,0.005999999865889549
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4,64,0.005152000114321709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4,32,0.008550400286912918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4,8,0.004902400076389313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4,32,0.004316800087690353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4,16,0.004220800101757049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4,16,0.0075552001595497135
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2,1,0.005923200026154518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4,128,0.016755199432373045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4,4,0.004927999898791313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4,8,0.006963200122117996
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4,64,0.011247999966144562
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4,2,0.003929600119590759
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8,128,0.005894400179386139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4,1,0.00697920024394989
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4,4,0.006908799707889557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8,64,0.005046400055289268
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8,32,0.004950400069355964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4,2,0.006403200328350067
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4,1,0.005920000001788139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8,8,0.005289600044488907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8,128,0.017209599912166595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8,64,0.011577600240707397
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8,16,0.004051199927926063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8,4,0.006752000004053116
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8,4,0.004473600164055824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8,32,0.008694399893283845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8,2,0.004214400053024292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8,8,0.007075200229883194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8,1,0.00483199991285801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8,16,0.00769599974155426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,16,128,0.006025600060820579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,16,64,0.0051231998950243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8,2,0.006518399715423584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8,1,0.0063231997191905975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,16,32,0.004470400139689445
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,16,32,0.00894080027937889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,16,16,0.004383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,16,128,0.017836800217628478
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,16,64,0.011667200177907944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,16,4,0.004102399945259095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,16,8,0.004016000032424927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,16,2,0.004262400045990944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,16,16,0.007868800312280655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,16,1,0.004310400038957596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,16,8,0.00724480003118515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,16,4,0.00687360018491745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,32,128,0.006348799914121628
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,32,128,0.0200095996260643
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,32,64,0.005193600058555603
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,16,1,0.00647680014371872
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,32,32,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,32,32,0.0045471999794244765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,16,2,0.006457599997520447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,32,64,0.012355200201272964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,32,16,0.004239999875426293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,32,16,0.008006399869918824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,32,8,0.004259200021624565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,32,8,0.007497599720954895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,32,2,0.005654399842023849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,32,4,0.004220800101757049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,32,4,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,32,2,0.007052800059318543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,32,1,0.003920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,32,1,0.006636799871921539
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,48,128,0.007039999961853028
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,48,64,0.005593600124120713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,48,128,0.021456000208854676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,48,64,0.01342719942331314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,48,32,0.004691199958324432
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,48,16,0.004259200021624565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,48,16,0.008636800199747085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,48,4,0.0042304001748561856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,48,8,0.004748800024390221
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,48,4,0.007603199779987335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,48,2,0.007321599870920181
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,48,2,0.004080000147223472
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,48,32,0.00979520007967949
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,48,1,0.0042304001748561856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,48,8,0.007651200145483017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,64,64,0.005827200040221215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,48,1,0.007129599899053573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,64,128,0.007644800096750259
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,64,32,0.004694399982690811
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,64,64,0.014745600521564484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,64,16,0.004889599978923798
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,64,128,0.022441600263118745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,64,2,0.004163200035691261
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,64,32,0.009945599734783173
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,64,8,0.004291199892759323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,64,8,0.008185599744319916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,64,4,0.007760000228881836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,64,4,0.004095999896526337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,64,1,0.0040224000811576845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,64,16,0.00894080027937889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,64,1,0.006924799829721451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,80,128,0.008076799660921096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,80,32,0.005158400163054466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,80,64,0.006143999844789505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,64,2,0.007388799637556076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,80,8,0.0043519999831914905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,80,16,0.00459199994802475
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,80,128,0.02505280077457428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,80,2,0.004310400038957596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,80,4,0.004582399874925614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,80,64,0.015510399639606477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,80,32,0.010691200196743012
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,80,1,0.005849599838256836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,80,16,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,80,4,0.007526399940252304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,96,128,0.008620800077915191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,80,8,0.00796160027384758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,96,32,0.0052767999470233916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,96,64,0.006428799778223038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,96,16,0.004716800153255462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,80,2,0.007283200323581695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,80,1,0.0070720002055168155
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,96,128,0.025935998558998107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,96,64,0.016448000073432924
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,96,8,0.004396799951791763
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,96,32,0.010755199939012527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,96,16,0.009334400296211243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,96,8,0.008262400329113007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,96,4,0.004412800073623657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,96,2,0.003920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,96,1,0.0049727998673915865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,128,128,0.010486400127410889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,128,64,0.007161600142717361
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,96,4,0.007894399762153625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,128,32,0.005670399963855743
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,96,2,0.007651200145483017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,96,1,0.00719040036201477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,128,64,0.016790400445461272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,128,128,0.028527998924255372
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,128,16,0.0046847999095916745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,128,4,0.0042559999972581865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,128,8,0.004492799937725067
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,128,2,0.004329600185155868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,128,2,0.007750400155782699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,128,16,0.009609600156545639
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,128,1,0.004003199934959412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,128,32,0.012009599804878235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,128,4,0.007846400141716003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,128,8,0.008524800091981888
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,160,128,0.012505599856376648
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,160,128,0.042335999011993405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,160,64,0.008099199831485748
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,160,32,0.006319999694824219
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,160,64,0.02295999974012375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,128,1,0.007273600250482559
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,160,32,0.014880000054836274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,160,4,0.006777600198984146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,160,8,0.00456320010125637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,160,2,0.004438399896025657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,160,16,0.005027199909090996
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,160,1,0.007366400212049484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,192,64,0.008591999858617782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,192,128,0.01719679981470108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,160,2,0.007974400371313094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,160,16,0.010214400291442872
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,160,8,0.009161599725484849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,160,4,0.007987199723720551
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,160,1,0.007417599856853485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,192,32,0.006361600011587143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,192,32,0.015574400126934052
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,192,16,0.00498879998922348
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,192,8,0.004675199836492538
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,192,128,0.045449599623680115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,192,4,0.005241600051522255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,192,16,0.010675200074911118
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,192,64,0.024691200256347655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,192,2,0.004278400167822838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,192,8,0.00915839970111847
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,192,1,0.007516799867153168
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,192,4,0.007999999821186066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,192,1,0.004383999854326248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,256,128,0.022047999501228332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,192,2,0.007699199765920639
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,256,64,0.00976639986038208
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,256,32,0.0069023996591567995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,256,32,0.01615999937057495
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,256,16,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,256,64,0.02698880136013031
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,256,8,0.004620800167322159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,256,4,0.00456320010125637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,256,16,0.005446400120854377
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,256,128,0.05045440196990967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,256,4,0.00806720033288002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,256,8,0.009427200257778167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,256,2,0.00806720033288002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,256,2,0.004543999955058098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,256,1,0.00411520004272461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,320,128,0.0679423987865448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,256,1,0.007609599828720092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,320,64,0.011692799627780914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,320,128,0.024163199961185454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,320,32,0.007331199944019318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,320,16,0.005731200054287911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,320,8,0.005075199902057648
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,320,4,0.004515200108289719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,320,32,0.0204352006316185
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,320,64,0.03580799996852875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,320,2,0.004377600178122521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,320,16,0.012956799566745758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,320,1,0.0042015999555587765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,320,1,0.007702399790287018
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,384,128,0.027718400955200194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,320,8,0.009097599983215332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,384,64,0.013948799669742584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,320,4,0.00827839970588684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,384,16,0.013414399325847625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,384,64,0.03779839873313904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,320,2,0.007910399883985519
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,384,16,0.006089600175619126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,384,128,0.07030720114707947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,384,32,0.008380799740552902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,384,8,0.004982399940490723
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,384,4,0.004659200087189674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,384,2,0.004137599840760231
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,384,1,0.0044319998472929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,384,32,0.021132799983024596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,384,1,0.0077344000339508055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,384,8,0.009375999867916106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,384,4,0.008550400286912918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,512,32,0.025596800446510314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,512,128,0.03368319869041443
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,512,128,0.09082239866256714
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,384,2,0.00793280005455017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,512,64,0.04776639938354492
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,512,64,0.019859200716018675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,512,32,0.00971520021557808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,512,16,0.006867200136184692
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,512,4,0.004636799916625023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,512,8,0.0054687999188899996
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,512,2,0.0045855998992919925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,512,16,0.015398399531841278
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,512,1,0.0042975999414920805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,512,1,0.007468800246715546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,768,64,0.02620159983634949
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,768,128,0.04630399942398071
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,768,32,0.036236798763275145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,512,8,0.0107744000852108
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,512,2,0.008252800256013871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,512,4,0.00891520008444786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,768,32,0.01385599970817566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,768,16,0.02034880071878433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,768,16,0.008390399813652038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,768,128,0.12981760501861572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,768,2,0.004518400132656098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,768,64,0.0686240017414093
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,768,8,0.006095999851822853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,768,4,0.004931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,768,1,0.00451200008392334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1024,128,0.06000959873199463
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1024,128,0.16960320472717286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,768,8,0.013174399733543396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1024,64,0.03272640109062195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,768,4,0.008918400108814239
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,768,1,0.008105599880218506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1024,32,0.01889919936656952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1024,32,0.04679679870605469
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1024,16,0.009676799923181535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1024,64,0.0884768009185791
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1024,16,0.025283199548721314
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,768,2,0.008691199868917466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1024,8,0.006735999882221222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1024,4,0.005375999957323074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1024,2,0.004771199822425842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1024,8,0.015292799472808838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1024,1,0.004403200000524521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1536,128,0.08702719807624817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1536,64,0.12814079523086547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1536,64,0.04594559967517853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1536,32,0.06722239851951599
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1536,16,0.01361600011587143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1536,32,0.024876800179481507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1024,2,0.009078399837017059
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1024,4,0.010902400314807891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1024,1,0.008310399949550629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1536,8,0.01988160014152527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1536,128,0.24653759002685546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1536,8,0.00830719992518425
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1536,1,0.0046847999095916745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1536,4,0.006118400022387505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,1536,2,0.005027199909090996
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1536,16,0.035872000455856326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2048,128,0.32671360969543456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2048,128,0.11370240449905396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2048,64,0.05872640013694763
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2048,32,0.08742079734802247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1536,4,0.013104000687599182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2048,64,0.16840959787368776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2048,32,0.03203519880771637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2048,8,0.009769599884748459
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2048,16,0.017136000096797943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1536,1,0.008348800241947174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2048,16,0.046598398685455324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2048,2,0.005315199866890908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2048,4,0.006889600306749344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,1536,2,0.009171199798583985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,3072,128,0.1705631971359253
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,2048,1,0.004694399982690811
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2048,8,0.024928000569343568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,3072,128,0.4816703796386719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2048,4,0.015321600437164306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,3072,64,0.24285440444946288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,3072,32,0.0448415994644165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,3072,8,0.013782399892807006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,3072,16,0.02446399927139282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,3072,32,0.12599040269851686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,3072,64,0.08415039777755737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2048,2,0.0108255997300148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,3072,16,0.0664192020893097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,3072,4,0.008303999900817871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,2048,1,0.009040000289678574
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4096,128,0.2381216049194336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,3072,4,0.020214399695396422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,3072,2,0.006217600032687187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,3072,1,0.004879999905824661
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4096,64,0.1095039963722229
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4096,128,0.6285727977752685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,3072,8,0.035036799311637876
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,3072,2,0.01316480040550232
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4096,64,0.32146880626678465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4096,32,0.05740479826927185
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4096,16,0.031667199730873105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4096,32,0.16565760374069213
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4096,8,0.01693120002746582
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4096,16,0.08664320111274719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4096,2,0.006950400024652481
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,3072,1,0.008928000181913375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4096,4,0.009612800180912017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,6144,128,0.3609215974807739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4096,2,0.014921599626541137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,4096,1,0.005299200117588043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,6144,128,0.9321215629577637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,6144,64,0.16776959896087645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,6144,64,0.47138237953186035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4096,8,0.04608319997787476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4096,4,0.02481600046157837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,6144,32,0.08247680068016053
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,6144,8,0.02655999958515167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,6144,16,0.044223999977111815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,6144,8,0.06615679860115051
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,6144,2,0.008326400071382523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,4096,1,0.010924799740314484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,6144,4,0.013699199259281158
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,6144,32,0.24189119338989257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,6144,16,0.12602880001068115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,6144,1,0.006438399851322174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8192,128,0.47571840286254885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,6144,4,0.035020801424980166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8192,128,1.2094976425170898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8192,32,0.10873600244522094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8192,64,0.21502718925476075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,6144,2,0.020070399343967437
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8192,64,0.6274240016937256
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,6144,1,0.012928000092506409
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8192,8,0.031206399202346802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8192,16,0.05634239912033081
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8192,32,0.31874880790710447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8192,16,0.16436480283737182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8192,8,0.08561919927597046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8192,4,0.016700799763202667
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8192,2,0.009532800316810608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8192,4,0.046515199542045596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,float16,8192,1,0.007116799801588058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8192,1,0.015091200172901154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_pre,default,fp8,8192,2,0.024707199633121492
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1,128,0.008313599973917007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1,64,0.005683200061321258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1,128,0.011715199798345566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1,32,0.004377600178122521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1,64,0.009993600100278855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1,8,0.0044064000248909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1,2,0.004291199892759323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1,32,0.008140800148248672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1,16,0.00785600021481514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1,16,0.004236799851059914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1,1,0.004521600157022476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1,4,0.004310400038957596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2,128,0.005375999957323074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1,4,0.006406400352716446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1,2,0.006377600133419037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2,64,0.005260799825191498
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1,8,0.006726399809122085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2,32,0.0047231998294591905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2,128,0.01175680011510849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2,16,0.004438399896025657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1,1,0.005920000001788139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2,64,0.00926079973578453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2,32,0.008220800012350083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2,4,0.0042975999414920805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2,8,0.0041184000670909885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2,16,0.007983999699354172
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2,1,0.005398400127887726
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2,2,0.004447999969124794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2,8,0.0072223998606204985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2,4,0.0069760002195835115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4,128,0.005497600138187409
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4,64,0.005529600009322166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2,2,0.0065151996910572055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2,1,0.006345599889755249
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4,32,0.0047520000487566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4,16,0.004291199892759323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4,8,0.004294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4,128,0.012374400347471236
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4,64,0.009721600264310837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4,16,0.007875200361013412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4,32,0.008310399949550629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4,4,0.004239999875426293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4,2,0.004441599920392036
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4,1,0.004307200014591217
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4,8,0.007305599749088287
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8,128,0.005731200054287911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4,4,0.006908799707889557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8,64,0.004931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8,64,0.009935999661684037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4,1,0.006585600227117539
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4,2,0.006828799843788147
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8,128,0.013504000008106231
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8,16,0.004358400031924248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8,32,0.00456320010125637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8,32,0.008339200168848038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8,8,0.0042559999972581865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8,4,0.0043136000633239744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8,2,0.004275200143456459
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8,1,0.004956800118088722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8,1,0.0064800001680850984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8,16,0.007993599772453308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16,128,0.0059264000505208966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8,8,0.007119999825954437
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8,2,0.006691200286149978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16,64,0.005008000135421753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16,64,0.011264000087976456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8,4,0.006966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16,32,0.004502400010824204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16,16,0.0043935999274253845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16,8,0.005014400184154511
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16,128,0.01451520025730133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16,32,0.009027200192213059
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16,2,0.004278400167822838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16,16,0.007648000121116638
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16,4,0.005183999985456466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16,2,0.006857600063085556
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16,1,0.004316800087690353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16,8,0.0073183998465538025
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16,4,0.007238399982452392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,32,128,0.006294400244951248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,32,128,0.01722240000963211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,32,32,0.004873599857091904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,32,32,0.010076799988746643
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,32,64,0.005488000065088272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16,1,0.006876800209283829
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,32,16,0.0045855998992919925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,32,16,0.008284799754619598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,32,8,0.004195199906826019
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,32,8,0.007910399883985519
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,32,4,0.004524800181388855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,32,4,0.008038400113582611
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,32,64,0.01218239963054657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,32,1,0.004281599819660187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,32,2,0.004985599964857102
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,32,1,0.007612799853086471
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,32,2,0.007180800288915634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,48,64,0.005542400106787681
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,48,128,0.006703999638557434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,48,32,0.004816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,48,8,0.005296000093221664
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,48,16,0.004534399881958961
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,48,32,0.010883200168609618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,48,16,0.008854400366544724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,48,64,0.014131200313568116
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,48,8,0.008595199882984161
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,48,4,0.0078015998005867004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,48,128,0.02035840004682541
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,48,4,0.0044319998472929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,48,2,0.004185599833726883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,48,2,0.00759039968252182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,48,1,0.004278400167822838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,64,128,0.023004800081253052
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,64,128,0.006988800317049027
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,64,32,0.0048191998153924945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,64,64,0.0054687999188899996
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,64,16,0.010342399775981902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,64,32,0.011327999830245971
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,64,16,0.004716800153255462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,64,8,0.004726399853825569
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,64,4,0.008115199953317642
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,64,8,0.008470399677753449
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,64,64,0.015513600409030914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,64,4,0.004416000097990036
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,48,1,0.007225599884986877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,64,2,0.00448639988899231
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,64,2,0.007942400127649307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,64,1,0.004438399896025657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,80,128,0.007254400104284286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,80,64,0.00578560009598732
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,80,32,0.0049183998256921765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,80,16,0.004937599971890449
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,64,1,0.007891199737787246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,80,8,0.005958399921655655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,80,4,0.004572800174355507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,80,64,0.017638400197029114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,80,2,0.004495999962091446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,80,32,0.01213119998574257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,80,16,0.010201600193977357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,80,128,0.02748799920082092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,80,1,0.004787199944257736
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,80,8,0.008876799792051315
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,80,4,0.0081216000020504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,96,128,0.008243200182914735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,96,64,0.005836800113320351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,96,64,0.0190528005361557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,80,2,0.00809279978275299
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,96,8,0.0049056001007556915
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,96,16,0.004623999819159508
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,96,32,0.0050016000866889955
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,96,128,0.03080959916114807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,80,1,0.007433599978685379
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,96,4,0.0049472000449895855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,96,32,0.013059200346469879
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,96,2,0.004995200037956238
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,96,16,0.0108255997300148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,96,1,0.004412800073623657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,128,128,0.008796799927949905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,96,8,0.008988799899816513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,96,2,0.007820799946784973
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,96,4,0.00820479989051819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,128,32,0.0051392000168561935
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,128,64,0.00634239986538887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,96,1,0.007628799974918365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,128,16,0.005385600030422211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,128,128,0.03648000061511993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,128,8,0.0054848000407218935
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,128,4,0.004953600093722343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,128,32,0.014643199741840363
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,128,64,0.02184640020132065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,128,2,0.0044351998716592785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,128,1,0.004944000020623207
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,128,16,0.011059200018644333
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,128,8,0.01005759984254837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,160,128,0.010230399668216705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,160,64,0.0065600000321865085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,128,4,0.008553600311279297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,160,128,0.0470880001783371
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,128,2,0.00851840004324913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,160,16,0.005302400141954422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,128,1,0.008054400235414505
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,160,32,0.005385600030422211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,160,8,0.004931199923157692
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,160,4,0.004860800132155419
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,160,2,0.004294399917125702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,160,1,0.0048351999372243885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,160,8,0.010502400249242783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,192,128,0.01438400000333786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,160,64,0.025379198789596557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,160,4,0.008991999924182892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,160,16,0.012211199849843979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,160,32,0.016652800142765045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,192,64,0.006960000097751618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,160,2,0.008297599852085114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,192,32,0.0056639999151229855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,192,64,0.028201600909233092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,160,1,0.007926400005817413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,192,128,0.05187199711799621
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,192,16,0.005004800111055374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,192,8,0.0045471999794244765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,192,8,0.010857599973678588
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,192,2,0.005353600159287453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,192,4,0.004495999962091446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,192,32,0.017936000227928163
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,192,16,0.012848000228404998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,256,128,0.018822400271892546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,192,1,0.0044351998716592785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,192,1,0.008025600016117096
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,192,4,0.00891520008444786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,256,64,0.007702399790287018
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,256,32,0.005852799862623215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,192,2,0.008611200004816055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,256,16,0.0051552001386880875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,256,8,0.011059200018644333
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,256,128,0.06353920102119445
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,256,8,0.004892800003290176
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,256,4,0.004742399975657463
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,256,64,0.03355199992656708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,256,2,0.005033599957823753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,256,4,0.009801600128412247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,256,32,0.020502400398254395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,256,16,0.014217600226402283
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,320,128,0.07913600206375122
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,256,1,0.004454400017857551
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,320,128,0.021609599888324737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,320,64,0.008566399663686752
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,256,1,0.008294399827718735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,320,32,0.006883200258016586
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,320,16,0.005260799825191498
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,256,2,0.008687999844551087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,320,8,0.004979199916124344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,320,64,0.04441600143909454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,320,4,0.004620800167322159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,320,4,0.010224000364542008
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,320,2,0.008876799792051315
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,320,2,0.004502400010824204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,320,1,0.004502400010824204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,320,1,0.008044800162315369
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,384,128,0.024579200148582458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,320,16,0.01621440052986145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,320,32,0.024028800427913666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,320,8,0.011699199676513672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,384,64,0.009433600306510925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,384,16,0.005503999814391136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,384,32,0.006611199676990509
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,384,64,0.0499103993177414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,384,128,0.09099839925765991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,384,8,0.005126399919390678
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,384,4,0.0049727998673915865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,384,4,0.010598400235176086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,384,2,0.004556800052523613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,384,32,0.026425600051879883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,384,1,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,384,2,0.008841600269079208
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,384,16,0.017811200022697447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,384,8,0.012464000284671784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,384,1,0.004502400010824204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,512,64,0.06196799874305725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,512,128,0.03057279884815216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,512,32,0.007996799796819687
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,512,128,0.11799999475479125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,512,32,0.03246079981327057
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,512,64,0.014959999918937683
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,512,16,0.00562559999525547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,512,8,0.005033599957823753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,512,4,0.004975999891757965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,512,2,0.0051807999610900875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,512,2,0.00966079980134964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,512,1,0.0044895999133586885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,512,1,0.008252800256013871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,512,16,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,768,128,0.1703968048095703
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,768,128,0.04164479970932007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,512,8,0.013833600282669067
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,768,32,0.009414400160312652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,768,64,0.08930240273475647
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,512,4,0.011046399921178817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,768,64,0.022627200186252593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,768,16,0.006470400094985962
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,768,8,0.005500800162553787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,768,2,0.01058880016207695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,768,4,0.0048640001565217975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,768,2,0.004572800174355507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,768,32,0.04854399859905243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,768,1,0.0044064000248909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1024,128,0.22377920150756836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1024,128,0.0530784010887146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1024,64,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,768,1,0.009008000046014786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,768,16,0.026393601298332216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,768,8,0.01738879978656769
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,768,4,0.012243200093507767
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1024,32,0.011430399864912033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1024,8,0.01995519995689392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1024,64,0.11560959815979004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1024,16,0.007644800096750259
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1024,8,0.005881600081920624
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1024,4,0.004953600093722343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1024,2,0.005158400163054466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1024,32,0.0608672022819519
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1024,16,0.031769600510597226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1024,1,0.0046847999095916745
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1024,4,0.013702400028705597
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1536,128,0.07473919987678528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1536,64,0.040880000591278075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1024,2,0.010950399935245514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1536,32,0.021695999801158904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1536,32,0.08786240220069885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1536,128,0.33029439449310305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1536,16,0.009334400296211243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1536,64,0.16862399578094484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1024,1,0.009865599870681762
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1536,8,0.006415999680757523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1536,4,0.005318399891257286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1536,2,0.004921599850058555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1536,16,0.04750399887561798
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,1536,1,0.005392000079154968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2048,128,0.09735999703407287
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2048,128,0.43477439880371094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2048,64,0.05224639773368835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1536,1,0.010460799932479859
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2048,32,0.02836799919605255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1536,4,0.017100800573825837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1536,2,0.012095999717712403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,1536,8,0.025804799795150758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2048,4,0.0057599999010562895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2048,16,0.010908800363540649
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2048,8,0.007676800340414047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2048,64,0.22225921154022216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2048,2,0.005241600051522255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2048,32,0.11462080478668213
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2048,16,0.060412800312042235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,2048,1,0.004732799902558327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2048,1,0.010815999656915664
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2048,8,0.03171519935131073
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,3072,128,0.14197440147399903
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2048,4,0.019596800208091736
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,3072,128,0.6467552185058594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,3072,64,0.07359039783477783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,3072,16,0.022361600399017335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,2048,2,0.013612799346446991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,3072,32,0.04034239947795868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,3072,4,0.006598400324583054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,3072,8,0.009440000355243682
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,3072,2,0.005427199974656105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,3072,32,0.1672287940979004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,3072,64,0.3287584066390991
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4096,128,0.18770560026168823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,3072,16,0.08752959966659546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,3072,8,0.04770239889621734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,3072,1,0.004816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4096,128,0.8588607788085938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4096,64,0.0953279972076416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4096,32,0.05124800205230713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4096,32,0.22129600048065184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,3072,4,0.02556479871273041
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,3072,2,0.01719360053539276
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4096,16,0.029020801186561584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,3072,1,0.012387199699878693
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4096,64,0.43472962379455565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4096,8,0.01128000020980835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4096,16,0.11444159746170043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4096,4,0.007644800096750259
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4096,2,0.005539200082421303
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,4096,1,0.004975999891757965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4096,8,0.06021760106086731
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,6144,128,0.28102080821990966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4096,4,0.031036800146102904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,6144,128,1.2745823860168457
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,6144,64,0.13858239650726317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,6144,32,0.32781119346618653
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4096,2,0.01977279931306839
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,6144,32,0.07205439805984497
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,6144,64,0.6459424018859863
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,4096,1,0.01366720050573349
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,6144,8,0.08708159923553467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,6144,16,0.03939839899539947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,6144,4,0.009244800359010697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,6144,16,0.16674879789352418
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,6144,8,0.02229759991168976
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,6144,1,0.005497600138187409
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,6144,2,0.006499200314283371
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8192,64,0.8603679656982421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8192,128,0.37493760585784913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,6144,4,0.047443199157714847
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8192,64,0.18542720079421998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8192,128,1.6909824371337892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8192,32,0.0942367970943451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8192,32,0.4329535961151123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8192,16,0.05415999889373779
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8192,8,0.02691200077533722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8192,4,0.011062400043010711
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,6144,1,0.016966399550437928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,6144,2,0.025551998615264894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8192,2,0.007670400291681289
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8192,16,0.22098879814147948
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8192,1,0.019785599410533906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8192,8,0.11443840265274048
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,8192,1,0.005798399820923806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8192,4,0.060031998157501223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,12288,128,0.565171194076538
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,12288,64,1.2722208023071289
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,12288,64,0.26569280624389646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,12288,128,2.5246400833129883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,8192,2,0.031241598725318908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,12288,32,0.1380895972251892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,12288,8,0.041254401206970215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,12288,16,0.07127360105514527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,12288,32,0.6444704055786132
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,12288,16,0.328656005859375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,12288,4,0.02197760045528412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,12288,8,0.16684160232543946
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,12288,1,0.006489600241184235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,12288,2,0.009116800129413604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16384,128,0.7530335903167724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16384,64,1.6993535995483398
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,12288,4,0.08663039803504943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16384,64,0.3738368034362793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16384,128,3.3550880432128904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16384,32,0.17888640165328978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,12288,2,0.04679999947547912
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16384,16,0.09889280200004577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16384,8,0.05382400155067444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16384,4,0.02871679961681366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16384,4,0.11320320367813111
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16384,2,0.01093439981341362
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,12288,1,0.02542400062084198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16384,8,0.2222815990447998
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16384,16,0.43335680961608886
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16384,32,0.8577856063842774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,20480,128,0.9214143753051758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,16384,1,0.007686399668455124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,20480,32,1.0706975936889649
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,20480,32,0.23960959911346436
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16384,2,0.059721601009368894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,20480,16,0.11403839588165283
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,20480,64,2.1058176040649412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,20480,64,0.46503682136535646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,16384,1,0.030806401371955873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,20480,16,0.5382719993591308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,20480,128,4.177817535400391
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,20480,8,0.06096000075340271
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,20480,8,0.27306880950927737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,20480,4,0.03234559893608093
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,20480,1,0.008495999872684479
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,20480,4,0.14027199745178223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,20480,2,0.0753440022468567
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,float16,20480,2,0.018848000466823576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_gen_post,default,fp8,20480,1,0.04108799993991852
