framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,1,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,1,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,1,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,1,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,1,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,1,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,1,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,1,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,1,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,1,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,1,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,1,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,1,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,3,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,3,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,3,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,3,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,3,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,3,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,3,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,3,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,3,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,3,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,3,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,3,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,3,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,3,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,3,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,7,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,7,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,7,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,7,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,7,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,7,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,7,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,7,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,7,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,7,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,7,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,7,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,7,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,7,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,15,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,15,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,15,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,15,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,15,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,15,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,15,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,15,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,15,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,15,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,15,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,15,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,15,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,15,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,31,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,31,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,31,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,31,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,31,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,31,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,31,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,31,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,31,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,31,0.012549333274364471
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,31,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,31,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,31,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,63,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,63,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,63,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,63,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,63,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,63,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,63,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,63,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,63,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,63,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,63,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,63,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,63,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,127,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,127,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,127,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,127,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,127,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,127,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,127,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,127,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,127,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,127,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,127,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,127,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,127,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,127,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,127,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,127,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,255,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,255,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,255,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,255,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,255,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,255,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,255,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,255,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,255,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,255,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,255,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,255,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,255,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,255,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,255,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,511,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,511,0.013466666142145792
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,511,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,511,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,511,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,511,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,511,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,511,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,511,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,511,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,511,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,511,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,511,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,511,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,511,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,511,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,1023,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,1023,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,1023,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,1023,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,1023,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,1023,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,1023,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,1023,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,1023,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,1023,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,1023,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,1023,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,1023,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,1023,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,1023,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,1023,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,2047,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,2047,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,2047,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,2047,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,2047,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,2047,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,2047,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,2047,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,2047,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,2047,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,2047,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,2047,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,2047,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,2047,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,2047,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,4095,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,4095,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,4095,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,4095,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,4095,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,4095,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,4095,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,4095,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,4095,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,4095,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,4095,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,4095,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,4095,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,4095,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,4095,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,4095,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,8191,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,8191,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,8191,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,8191,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,8191,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,8191,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,8191,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,8191,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,8191,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,8191,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,8191,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,8191,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,8191,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,8191,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,8191,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,8191,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,16383,0.03847466657559077
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,16383,0.028805332879225414
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,16383,0.023770667612552643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,16383,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,16383,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,16383,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,16383,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,16383,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,16383,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,16383,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,16383,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,16383,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,16383,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,16383,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,16383,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,16383,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,32767,0.03495466709136963
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,32767,0.03618666778008143
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,32767,0.029690665503342945
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,32767,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,32767,0.028607999285062153
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,32767,0.028016000986099243
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,32767,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,32767,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,32767,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,32767,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,32767,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,32767,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,32767,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,32767,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,32767,0.02473066747188568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,32767,0.024501333634058636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,65535,0.03895466774702072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,65535,0.0439573327700297
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,65535,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,65535,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,65535,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,65535,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,65535,0.03596800069014231
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,65535,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,65535,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,65535,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,65535,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,65535,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,65535,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,65535,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,65535,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,65535,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,131071,0.058058664202690125
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,131071,0.06233599781990051
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,131071,0.07082666456699371
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,131071,0.05436266462008158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,131071,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,131071,0.05706666906674703
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,131071,0.056794668237368263
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,131071,0.05653333167235056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,131071,0.04398400088151296
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,131071,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,131071,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,131071,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,131071,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,131071,0.03648533423741659
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,131071,0.035173334181308746
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,131071,0.03688533355792364
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,1,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,1,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,1,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,1,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,1,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,1,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,1,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,1,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,1,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,1,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,1,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,1,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,1,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,1,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,1,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,1,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,3,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,3,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,3,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,3,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,3,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,3,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,3,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,3,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,3,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,3,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,3,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,3,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,3,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,3,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,3,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,3,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,7,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,7,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,7,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,7,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,7,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,7,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,7,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,7,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,7,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,7,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,7,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,7,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,7,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,7,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,7,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,15,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,15,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,15,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,15,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,15,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,15,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,15,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,15,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,15,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,15,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,15,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,15,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,31,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,31,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,31,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,31,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,31,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,31,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,31,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,31,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,31,0.01257066677014033
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,31,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,31,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,31,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,31,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,31,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,63,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,63,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,63,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,63,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,63,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,63,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,63,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,63,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,63,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,63,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,63,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,63,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,127,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,127,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,127,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,127,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,127,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,127,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,127,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,127,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,127,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,127,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,127,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,127,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,127,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,127,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,255,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,255,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,255,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,255,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,255,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,255,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,255,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,255,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,255,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,255,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,255,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,255,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,255,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,255,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,255,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,511,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,511,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,511,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,511,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,511,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,511,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,511,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,511,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,511,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,511,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,511,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,511,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,511,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,511,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,511,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,511,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,1023,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,1023,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,1023,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,1023,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,1023,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,1023,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,1023,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,1023,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,1023,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,1023,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,1023,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,1023,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,1023,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,1023,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,1023,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,1023,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,2047,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,2047,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,2047,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,2047,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,2047,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,2047,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,2047,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,2047,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,2047,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,2047,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,2047,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,2047,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,2047,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,2047,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,2047,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,2047,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,4095,0.022810667753219604
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,4095,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,4095,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,4095,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,4095,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,4095,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,4095,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,4095,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,4095,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,4095,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,4095,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,4095,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,4095,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,4095,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,4095,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,4095,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,8191,0.03009066730737686
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,8191,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,8191,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,8191,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,8191,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,8191,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,8191,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,8191,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,8191,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,8191,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,8191,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,8191,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,8191,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,8191,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,8191,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,8191,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,16383,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,16383,0.03278400003910065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,16383,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,16383,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,16383,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,16383,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,16383,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,16383,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,16383,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,16383,0.02470933397610982
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,16383,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,16383,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,16383,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,16383,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,16383,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,16383,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,32767,0.03182400017976761
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,32767,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,32767,0.03717333326737086
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,32767,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,32767,0.029717333614826202
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,32767,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,32767,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,32767,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,32767,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,32767,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,32767,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,32767,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,32767,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,32767,0.025920001169045765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,32767,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,32767,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,65535,0.051962668697039284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,65535,0.05885866781075796
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,65535,0.05673066775004069
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,65535,0.05227200190226237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,65535,0.052149335543314614
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,65535,0.05373866856098175
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,65535,0.05179200073083242
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,65535,0.05301333467165629
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,65535,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,65535,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,65535,0.034101332227389015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,65535,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,65535,0.03046400099992752
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,65535,0.02996266633272171
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,65535,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,65535,0.029711998999118805
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,131071,0.07509333391984303
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,131071,0.0853653351465861
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,131071,0.09166933099428813
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,131071,0.07727466523647308
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,131071,0.07503466804822286
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,131071,0.07737066845099132
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,131071,0.07689600189526875
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,131071,0.07574399809042613
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,131071,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,131071,0.05653333167235056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,131071,0.051882664362589516
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,131071,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,131071,0.04691733419895172
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,131071,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,131071,0.04939733445644379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,131071,0.049738665421803795
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,1,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,1,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,1,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,1,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,1,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,1,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,1,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,1,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,1,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,1,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,1,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,1,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,1,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,1,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,1,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,1,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,3,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,3,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,3,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,3,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,3,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,3,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,3,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,3,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,3,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,3,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,3,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,7,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,7,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,7,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,7,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,7,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,7,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,7,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,7,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,7,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,7,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,7,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,7,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,7,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,7,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,7,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,15,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,15,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,15,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,15,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,15,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,15,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,15,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,15,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,15,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,15,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,15,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,15,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,15,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,15,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,15,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,31,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,31,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,31,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,31,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,31,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,31,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,31,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,31,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,31,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,31,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,31,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,31,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,31,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,31,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,63,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,63,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,63,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,63,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,63,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,63,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,63,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,63,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,63,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,63,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,63,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,63,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,63,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,127,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,127,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,127,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,127,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,127,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,127,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,127,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,127,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,127,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,127,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,127,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,127,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,127,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,127,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,127,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,255,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,255,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,255,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,255,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,255,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,255,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,255,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,255,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,255,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,255,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,255,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,255,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,255,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,255,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,255,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,511,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,511,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,511,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,511,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,511,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,511,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,511,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,511,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,511,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,511,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,511,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,511,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,511,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,511,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,511,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,511,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,1023,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,1023,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,1023,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,1023,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,1023,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,1023,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,1023,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,1023,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,1023,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,1023,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,1023,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,1023,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,1023,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,1023,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,1023,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,1023,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,2047,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,2047,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,2047,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,2047,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,2047,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,2047,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,2047,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,2047,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,2047,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,2047,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,2047,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,2047,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,2047,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,2047,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,2047,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,2047,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,4095,0.028650666276613872
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,4095,0.021615999440352123
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,4095,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,4095,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,4095,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,4095,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,4095,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,4095,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,4095,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,4095,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,4095,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,4095,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,4095,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,4095,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,4095,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,4095,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,8191,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,8191,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,8191,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,8191,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,8191,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,8191,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,8191,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,8191,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,8191,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,8191,0.022858666876951855
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,8191,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,8191,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,8191,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,8191,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,8191,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,8191,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,16383,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,16383,0.03295466552178065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,16383,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,16383,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,16383,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,16383,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,16383,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,16383,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,16383,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,16383,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,16383,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,16383,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,16383,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,16383,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,16383,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,16383,0.022831998765468597
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,32767,0.04996266464392344
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,32767,0.05485333502292633
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,32767,0.05436799923578898
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,32767,0.04854933420817057
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,32767,0.047093331813812256
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,32767,0.04786666731039683
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,32767,0.047024001677831016
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,32767,0.047983999053637184
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,32767,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,32767,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,32767,0.0322773332397143
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,32767,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,32767,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,32767,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,32767,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,32767,0.026837334036827087
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,65535,0.07301866511503856
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,65535,0.07914133369922638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,65535,0.08923733234405518
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,65535,0.07156266768773396
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,65535,0.07111999889214833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,65535,0.07138133545716603
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,65535,0.07016000151634216
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,65535,0.07097599903742473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,65535,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,65535,0.0513973335425059
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,65535,0.05050133168697357
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,65535,0.043280000487963356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,65535,0.04218133290608724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,65535,0.04317333300908407
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,65535,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,65535,0.04429866870244344
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,131071,0.12082667152086894
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,131071,0.132832000652949
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,131071,0.1560533344745636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,131071,0.12370133399963379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,131071,0.11896000305811565
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,131071,0.12106666962305705
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,131071,0.12009066343307495
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,131071,0.1202133297920227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,131071,0.07197866837183635
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,131071,0.07437333464622498
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,131071,0.0811359981695811
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,131071,0.0664160003264745
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,131071,0.06452266871929169
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,131071,0.06655466556549072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,131071,0.06403733293215434
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,131071,0.06507200002670288
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,1,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,1,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,1,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,1,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,1,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,1,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,1,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,1,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,1,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,1,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,1,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,1,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,1,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,1,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,3,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,3,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,3,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,3,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,3,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,3,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,3,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,3,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,3,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,3,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,3,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,3,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,3,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,3,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,7,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,7,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,7,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,7,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,7,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,7,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,7,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,7,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,7,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,7,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,7,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,7,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,7,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,7,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,7,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,7,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,15,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,15,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,15,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,15,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,15,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,15,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,15,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,15,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,15,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,15,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,15,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,15,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,15,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,15,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,15,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,31,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,31,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,31,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,31,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,31,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,31,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,31,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,31,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,31,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,31,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,31,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,31,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,31,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,31,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,31,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,63,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,63,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,63,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,63,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,63,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,63,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,63,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,63,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,63,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,63,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,63,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,63,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,127,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,127,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,127,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,127,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,127,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,127,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,127,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,127,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,127,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,127,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,127,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,127,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,127,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,127,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,127,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,127,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,255,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,255,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,255,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,255,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,255,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,255,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,255,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,255,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,255,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,255,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,255,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,255,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,255,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,255,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,255,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,511,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,511,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,511,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,511,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,511,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,511,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,511,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,511,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,511,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,511,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,511,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,511,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,511,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,511,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,511,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,511,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,1023,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,1023,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,1023,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,1023,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,1023,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,1023,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,1023,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,1023,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,1023,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,1023,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,1023,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,1023,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,1023,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,1023,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,1023,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,1023,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,2047,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,2047,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,2047,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,2047,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,2047,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,2047,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,2047,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,2047,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,2047,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,2047,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,2047,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,2047,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,2047,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,2047,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,2047,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,2047,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,4095,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,4095,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,4095,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,4095,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,4095,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,4095,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,4095,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,4095,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,4095,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,4095,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,4095,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,4095,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,4095,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,4095,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,4095,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,4095,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,8191,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,8191,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,8191,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,8191,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,8191,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,8191,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,8191,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,8191,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,8191,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,8191,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,8191,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,8191,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,8191,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,8191,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,8191,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,8191,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,16383,0.04894400139649709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,16383,0.050928001602490745
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,16383,0.05806399881839752
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,16383,0.046240001916885376
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,16383,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,16383,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,16383,0.044821331898371376
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,16383,0.04404266675313314
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,16383,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,16383,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,16383,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,16383,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,16383,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,16383,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,16383,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,16383,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,32767,0.07298133273919423
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,32767,0.07580266892910004
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,32767,0.09113599856694539
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,32767,0.0690719981988271
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,32767,0.06680533289909363
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,32767,0.06857066849867503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,32767,0.0675786683956782
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,32767,0.067930668592453
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,32767,0.049925332268079124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,32767,0.048325334986050926
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,32767,0.05203733344872793
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,32767,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,32767,0.03998400022586187
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,32767,0.04009066770474116
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,32767,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,32767,0.03984533250331879
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,65535,0.12191999951998393
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,65535,0.12742933630943298
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,65535,0.16009066502253214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,65535,0.11906666556994121
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,65535,0.1164959967136383
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,65535,0.11703999837239583
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,65535,0.11683733264605205
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,65535,0.11571199695269267
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,65535,0.07221866647402446
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,65535,0.07074133555094402
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,65535,0.08327466746171315
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,65535,0.06302933394908905
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,65535,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,65535,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,65535,0.062496001521746315
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,65535,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,131071,0.21890666087468466
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,131071,0.22798399130503336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,131071,0.29758399724960327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,131071,0.22022400299708048
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,131071,0.21665600935618082
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,131071,0.21450666586558023
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,131071,0.2171786626180013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,131071,0.21609600385030112
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,131071,0.12078400452931722
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,131071,0.11402133107185364
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,131071,0.14451733231544495
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,131071,0.10458133618036906
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,131071,0.10378666718800862
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,131071,0.10340266426404317
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,131071,0.10357866684595744
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,131071,0.10362133383750916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,1,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,1,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,1,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,1,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,1,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,1,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,1,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,1,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,1,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,1,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,1,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,1,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,1,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,1,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,1,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,1,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,3,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,3,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,3,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,3,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,3,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,3,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,3,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,3,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,3,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,3,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,3,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,3,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,3,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,3,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,7,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,7,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,7,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,7,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,7,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,7,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,7,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,7,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,7,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,7,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,7,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,7,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,7,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,7,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,7,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,15,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,15,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,15,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,15,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,15,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,15,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,15,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,15,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,15,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,15,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,15,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,15,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,15,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,15,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,31,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,31,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,31,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,31,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,31,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,31,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,31,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,31,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,31,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,31,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,31,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,31,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,31,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,31,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,63,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,63,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,63,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,63,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,63,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,63,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,63,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,63,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,63,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,63,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,63,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,63,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,63,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,63,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,63,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,127,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,127,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,127,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,1,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,127,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,127,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,127,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,127,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,127,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,127,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,127,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,127,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,127,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,127,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,127,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,127,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,255,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,255,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,255,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,255,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,255,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,255,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,255,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,255,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,255,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,255,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,255,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,255,0.012549333274364471
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,255,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,255,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,255,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,255,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,511,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,511,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,511,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,511,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,511,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,511,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,511,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,511,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,511,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,511,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,511,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,511,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,511,0.013461332768201828
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,511,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,511,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,511,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,1023,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,1023,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,1023,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,1023,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,1023,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,1023,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,1023,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,1023,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,1023,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,1023,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,1023,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,1023,0.013466666142145792
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,1023,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,1023,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,1023,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,1023,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,2047,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,2047,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,2047,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,2047,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,2047,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,2047,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,2047,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,2047,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,2047,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,2047,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,2047,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,2047,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,2047,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,2047,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,2047,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,2047,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,4095,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,4095,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,4095,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,4095,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,4095,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,4095,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,4095,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,4095,0.025850666066010792
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,4095,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,63,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,4095,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,4095,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,4095,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,4095,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,4095,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,4095,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,8191,0.04878933231035868
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,8191,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,8191,0.054474666714668274
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,8191,0.046223998069763184
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,8191,0.04410133262475332
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,8191,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,8191,0.0429066667954127
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,8191,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,8191,0.031898667414983116
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,8191,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,8191,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,8191,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,8191,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,8191,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,8191,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,8191,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,16383,0.07692266503969829
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,16383,0.07635733485221863
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,16383,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,16383,0.09378666679064433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,16383,0.06866133213043213
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,16383,0.07067733506361644
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,16383,0.06823466718196869
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,16383,0.06820799907048543
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,16383,0.04967466493447622
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,16383,0.04744533201058706
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,16383,0.05142933130264282
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,16383,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,16383,0.04029333343108495
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,16383,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,16383,0.0393653338154157
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,16383,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,32767,0.12823999921480814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,32767,0.12874133388201395
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,32767,0.1689173380533854
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,32767,0.12250133355458577
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,32767,0.11970667044321696
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,32767,0.118367999792099
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,32767,0.11958932876586914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,32767,0.12380266189575195
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,32767,0.07515733440717061
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,32767,0.07168533404668172
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,32767,0.086709330479304
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,32767,0.06229866544405619
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,32767,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,32767,0.060965334375699363
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,32767,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,32767,0.06072533130645752
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,65535,0.23127466440200806
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,65535,0.23057599862416586
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,65535,0.3218933343887329
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,65535,0.21810666720072427
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,65535,0.2150986591974894
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,65535,0.21508800983428955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,65535,0.21730132897694907
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,65535,0.21625065803527832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,65535,0.12774399916330972
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,65535,0.1165173351764679
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,65535,0.1557866632938385
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,65535,0.10594133536020915
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,65535,0.10523733496665955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,65535,0.10503466924031575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,65535,0.10448533296585083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,65535,0.10353066523869832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,131071,0.43665067354838055
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,131071,0.43483201662699383
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,131071,0.6227200031280518
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,131071,0.41659732659657794
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,131071,0.40810132026672363
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,131071,0.4094293514887492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,131071,0.4038453499476115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,131071,0.4113066593805949
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,131071,0.22856533527374268
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,131071,0.20745599269866943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,131071,0.29236799478530884
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,131071,0.18890666961669922
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,131071,0.18780799706776938
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,131071,0.18811732530593872
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,131071,0.1861600081125895
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,131071,0.18571199973424277
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,1,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,1,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,1,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,1,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,1,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,1,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,1,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,1,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,1,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,1,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,1,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,1,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,1,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,1,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,1,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,3,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,3,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,3,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,3,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,3,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,3,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,3,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,3,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,3,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,3,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,3,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,3,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,3,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,3,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,3,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,7,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,7,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,7,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,7,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,7,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,7,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,7,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,7,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,7,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,7,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,7,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,7,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,7,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,7,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,7,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,7,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,15,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,15,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,15,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,15,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,15,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,15,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,15,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,15,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,15,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,15,0.013429333766301474
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,15,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,15,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,15,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,15,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,15,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,15,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,31,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,31,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,31,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,31,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,31,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,31,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,31,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,31,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,31,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,31,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,31,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,31,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,31,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,31,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,31,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,31,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,63,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,63,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,63,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,63,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,63,0.0136266661187013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,63,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,63,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,63,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,63,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,63,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,63,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,63,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,63,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,63,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,63,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,63,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,127,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,127,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,127,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,127,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,127,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,127,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,127,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,127,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,127,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,127,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,127,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,127,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,127,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,127,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,127,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,127,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,255,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,255,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,255,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,255,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,255,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,255,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,255,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,255,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,255,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,255,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,255,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,255,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,255,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,255,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,255,0.012416000167528788
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,255,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,511,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,511,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,511,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,511,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,511,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,511,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,511,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,511,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,511,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,511,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,511,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,511,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,511,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,511,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,511,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,511,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,1023,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,1023,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,1023,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,1023,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,1023,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,1023,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,1023,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,1023,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,1023,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,1023,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,1023,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,1023,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,1023,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,1023,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,1023,0.013274667163689932
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,1023,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,2047,0.0296426663796107
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,2047,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,2047,0.031845333675543465
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,2047,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,2047,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,2047,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,2047,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,2047,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,2047,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,2047,0.02276266614596049
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,2047,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,2047,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,2047,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,2047,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,2047,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,2047,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,4095,0.051728000243504844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,4095,0.05146133402983347
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,4095,0.056133334835370384
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,4095,0.04188266893227895
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,4095,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,4095,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,4095,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,4095,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,4095,0.03404266635576884
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,4095,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,4095,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,4095,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,4095,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,4095,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,4095,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,4095,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,8191,0.07726933558781941
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,8191,0.0755680004755656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,8191,0.09402666489283244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,8191,0.06821866830190022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,8191,0.06762666503588359
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,8191,0.06651733318964641
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,8191,0.06660800178845723
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,8191,0.06638933221499126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,8191,0.05417066812515259
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,8191,0.04754666487375895
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,8191,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,8191,0.03865066667397817
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,8191,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,8191,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,8191,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,8191,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,16383,0.1308746635913849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,16383,0.13133333126703897
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,16383,0.17079466581344604
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,16383,0.1202293336391449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,16383,0.11776000261306763
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,16383,0.11663466691970825
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,16383,0.11972799897193909
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,16383,0.11682666341463725
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,16383,0.07892266909281413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,16383,0.07355199754238129
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,16383,0.08827733000119527
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,16383,0.06161599854628245
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,16383,0.059631998340288796
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,16383,0.06020799775918325
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,16383,0.059664001067479454
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,16383,0.059248000383377075
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,32767,0.23469332853953043
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,32767,0.23564799626668295
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,32767,0.32067734003067017
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,32767,0.22059732675552368
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,32767,0.21804799636205038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,32767,0.21657600005467734
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,32767,0.21522132555643717
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,32767,0.21769599119822183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,32767,0.13011200229326883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,32767,0.12198932965596516
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,32767,0.15744533141454062
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,32767,0.10674666364987691
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,32767,0.10687999924023946
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,32767,0.10544533530871074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,32767,0.10576533277829488
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,32767,0.1051093339920044
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,65535,0.44139734903971356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,65535,0.43856533368428546
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,65535,0.6266613403956095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,65535,0.4233866532643636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,65535,0.4157866636912028
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,65535,0.41713066895802814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,65535,0.41516268253326416
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,65535,0.4158986806869507
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,65535,0.23332266012827554
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,65535,0.21755733092625937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,65535,0.2934880057970683
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,65535,0.19697066148122153
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,65535,0.19367466370264688
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,65535,0.19261332352956137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,65535,0.1934666633605957
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,65535,0.19368000825246176
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,131071,0.8541653156280518
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,131071,0.8535253206888834
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,131071,1.2273653348286946
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,131071,0.8269866307576498
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,131071,0.8118506272633871
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,131071,0.8112053076426188
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,131071,0.8067573706309
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,131071,0.8095520337422689
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,131071,0.43721067905426025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,131071,0.4090133508046468
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,131071,0.5696426630020142
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,131071,0.369920015335083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,131071,0.36883731683095294
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,131071,0.37030935287475586
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,131071,0.3667999903361003
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,131071,0.37084798018137616
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,1,0.01639466608564059
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,1,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,1,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,1,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,1,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,1,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,1,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,1,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,1,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,1,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,1,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,1,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,1,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,1,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,1,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,1,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,3,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,3,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,3,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,3,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,3,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,3,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,3,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,3,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,3,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,3,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,3,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,3,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,3,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,3,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,3,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,3,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,7,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,7,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,7,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,7,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,7,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,7,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,7,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,7,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,7,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,7,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,7,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,7,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,7,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,7,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,7,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,7,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,15,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,15,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,15,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,15,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,15,0.01370666672786077
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,15,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,15,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,15,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,15,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,15,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,15,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,15,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,15,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,15,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,15,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,15,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,31,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,31,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,31,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,31,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,31,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,31,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,31,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,31,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,31,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,31,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,31,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,31,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,31,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,31,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,31,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,31,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,63,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,63,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,63,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,63,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,63,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,63,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,63,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,63,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,63,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,63,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,63,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,63,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,63,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,63,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,63,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,63,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,127,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,127,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,127,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,127,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,127,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,127,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,127,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,127,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,127,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,127,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,127,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,127,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,127,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,127,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,127,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,127,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,255,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,255,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,255,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,255,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,255,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,255,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,255,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,255,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,255,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,255,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,255,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,255,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,255,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,255,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,255,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,255,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,511,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,511,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,511,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,511,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,511,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,511,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,511,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,511,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,511,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,511,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,511,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,511,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,511,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,511,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,511,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,511,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,1023,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,1023,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,1023,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,1023,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,1023,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,1023,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,1023,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,1023,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,1023,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,1023,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,1023,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,1023,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,1023,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,1023,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,1023,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,1023,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,2047,0.04861866434415182
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,2047,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,2047,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,2047,0.04295999805132548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,2047,0.04189866781234741
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,2047,0.04212800165017446
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,2047,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,2047,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,2047,0.029872000217437744
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,2047,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,2047,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,2047,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,2047,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,2047,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,2047,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,2047,0.019658666104078293
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,4095,0.0727040022611618
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,4095,0.08236266672611237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,4095,0.09297066926956177
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,4095,0.07127466797828674
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,4095,0.06784533460934956
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,4095,0.06702933212121327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,4095,0.06862399975458781
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,4095,0.06554666658242543
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,4095,0.04886400202910105
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,4095,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,4095,0.05021866659323374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,4095,0.03990400085846583
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,4095,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,4095,0.03804266701141993
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,4095,0.03786666691303253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,4095,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,8191,0.12495999534924825
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,8191,0.13296533624331155
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,8191,0.16845333576202393
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,8191,0.12095999717712402
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,8191,0.11718400319417317
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,8191,0.11912000179290771
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,8191,0.11597333351771037
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,8191,0.11750400066375732
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,8191,0.07669866581757863
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,8191,0.07640000184377034
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,8191,0.08686400453249614
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,8191,0.06314133107662201
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,8191,0.06065600117047628
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,8191,0.059792002042134605
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,8191,0.059445331494013466
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,8191,0.05932266513506571
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,16383,0.230730672677358
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,16383,0.23736000061035156
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,16383,0.31829333305358887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,16383,0.22360533475875854
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,16383,0.21614933013916016
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,16383,0.22033600012461343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,16383,0.21796266237894693
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,16383,0.21762667099634805
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,16383,0.12628799676895142
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,16383,0.12344533205032349
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,16383,0.154639999071757
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,16383,0.10791466633478801
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,16383,0.1050879955291748
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,16383,0.10622400045394897
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,16383,0.10383466879526775
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,16383,0.10494400064150493
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,32767,0.4365760087966919
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,32767,0.4429759979248047
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,32767,0.6216053167978922
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,32767,0.4253600041071574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,32767,0.4153439998626709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,32767,0.415333350499471
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,32767,0.41539200146993
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,32767,0.4151680072148641
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,32767,0.22989867130915323
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,32767,0.21965332825978598
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,32767,0.2935946583747864
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,32767,0.19592533508936563
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,32767,0.19399466117223105
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,32767,0.1935466726620992
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,32767,0.1932106614112854
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,32767,0.19202667474746704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,65535,0.8526346683502197
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,65535,0.8650186856587728
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,65535,1.2243626912434895
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,65535,0.8281813462575277
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,65535,0.8134240309397379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,65535,0.813221295674642
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,65535,0.8074186642964681
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,65535,0.8143413066864014
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,65535,0.4337013165156047
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,65535,0.40828267733256024
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,65535,0.5688586632410685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,65535,0.3709973494211833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,65535,0.3683253526687622
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,65535,0.36930668354034424
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,65535,0.3666079839070638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,65535,0.36848000685373944
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,131071,1.674224058787028
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,131071,1.6682666142781575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,131071,2.416170597076416
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,131071,1.6293387413024902
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,131071,1.5894826253255208
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,131071,1.596666653951009
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,131071,1.5922667185465496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,131071,1.604698657989502
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,131071,0.8415413697560629
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,131071,0.787168025970459
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,131071,1.1199733416239421
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,131071,0.7219733397165934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,131071,0.7172640164693197
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,131071,0.7244533697764078
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,131071,0.7179626623789469
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,131071,0.7190026442209879
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,1,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,1,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,1,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,1,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,1,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,1,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,1,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,1,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,1,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,1,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,1,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,1,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,1,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,1,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,1,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,1,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,3,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,3,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,3,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,3,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,3,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,3,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,3,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,3,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,3,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,3,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,3,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,3,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,3,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,3,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,3,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,3,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,7,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,7,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,7,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,7,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,7,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,7,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,7,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,7,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,7,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,7,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,7,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,7,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,7,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,7,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,7,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,7,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,15,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,15,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,15,0.020207999895016353
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,15,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,15,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,15,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,15,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,15,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,15,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,15,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,15,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,15,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,15,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,15,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,15,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,15,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,31,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,31,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,31,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,31,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,31,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,31,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,31,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,31,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,31,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,31,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,31,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,31,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,31,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,31,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,31,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,31,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,63,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,63,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,63,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,63,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,63,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,63,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,63,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,63,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,63,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,63,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,63,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,63,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,63,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,63,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,63,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,63,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,127,0.020741333564122517
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,127,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,127,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,127,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,127,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,127,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,127,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,127,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,127,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,127,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,127,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,127,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,127,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,127,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,127,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,127,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,255,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,255,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,255,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,255,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,255,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,255,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,255,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,255,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,255,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,255,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,255,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,255,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,255,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,255,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,255,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,255,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,1,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,511,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,511,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,511,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,511,0.020037333170572918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,511,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,511,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,511,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,511,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,511,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,511,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,511,0.031680000325044
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,511,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,511,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,511,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,511,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,1023,0.05030933519204458
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,1023,0.047872001926104225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,1023,0.055957332253456116
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,1023,0.043680002291997276
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,1023,0.041696002086003624
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,1023,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,1023,0.0421066681543986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,1023,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,1023,0.04238399863243103
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,1023,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,1023,0.03408000121514002
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,1023,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,1023,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,1023,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,1023,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,1023,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,2047,0.07789333164691925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,2047,0.07411199808120728
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,2047,0.09547733267148335
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,2047,0.06880533198515575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,2047,0.06644266843795776
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,2047,0.06656533479690552
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,2047,0.06518400212128957
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,2047,0.06541866560777028
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,2047,0.05726933479309082
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,2047,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,2047,0.0536106675863266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,2047,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,2047,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,2047,0.03706666578849157
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,2047,0.03794133414824804
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,2047,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,4095,0.1325440009435018
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,4095,0.12635733683904013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,4095,0.17083199818929037
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,4095,0.11904000242551167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,4095,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,4095,0.11894933382670085
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,4095,0.11628799637158711
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,4095,0.11692800124486287
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,4095,0.11555733283360799
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,4095,0.08469333251317342
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,4095,0.07055466870466869
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,4095,0.08899199962615967
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,4095,0.062261333068211876
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,4095,0.06069866816202799
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,4095,0.060005332032839455
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,4095,0.058575997749964394
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,8191,0.2346186637878418
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,8191,0.23109867175420126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,8191,0.3226933280626933
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,8191,0.2238933245340983
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,8191,0.21799999475479126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,8191,0.21758399407068887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,8191,0.219050665696462
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,8191,0.2186773419380188
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,8191,0.13646399974822998
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,8191,0.11981866757074992
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,8191,0.1583093305428823
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,8191,0.10672000050544739
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,8191,0.10517866412798564
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,8191,0.10366933544476827
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,8191,0.10338667035102844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,8191,0.1032426655292511
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,16383,0.44525333245595294
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,16383,0.43623467286427814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,16383,0.6244746843973795
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,16383,0.41972800095876056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,16383,0.41617600123087567
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,16383,0.41602667172749835
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,16383,0.42050135135650635
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,16383,0.4158506790796916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,16383,0.2390399972597758
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,16383,0.21705599625905356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,16383,0.29526933034261066
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,16383,0.19593065977096558
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,16383,0.1930453379948934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,16383,0.19509865840276083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,16383,0.19182399908701578
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,16383,0.19243200620015463
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,1,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,32767,0.8604533672332764
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,32767,0.8420586585998535
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,32767,1.2634720007578533
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,32767,0.822655995686849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,32767,0.8105599880218506
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,32767,0.8089173634847006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,32767,0.8193706671396891
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,32767,0.8145866394042969
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,32767,0.4458453257878621
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,32767,0.4058133363723755
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,32767,0.5713119904200236
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,32767,0.37165331840515137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,32767,0.36853333314259845
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,32767,0.36868266264597577
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,32767,0.369706670443217
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,32767,0.36741332213083905
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,65535,1.69160000483195
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,65535,1.655098597208659
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,65535,2.555354595184326
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,65535,1.6093279520670574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,65535,1.5956106185913086
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,65535,1.6013174057006836
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,65535,1.6080427169799805
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,65535,0.8568320274353027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,65535,0.7813226381937662
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,65535,1.1374560197194417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,65535,0.7191253503163656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,65535,0.7185920079549154
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,65535,0.7167627016703287
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,65535,0.7177813053131104
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,65535,0.7176106770833334
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,131071,3.3427947362264
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,131071,3.290986696879069
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,131071,5.094229380289714
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,131071,3.198666572570801
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,131071,3.179093360900879
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,131071,3.1831893920898438
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,131071,3.17299747467041
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,131071,3.181119918823242
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,131071,1.679418722788493
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,131071,1.5420479774475098
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,131071,2.2962986628214517
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,131071,1.4206132888793945
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,131071,1.4173119862874348
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,131071,1.4172426859537761
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,131071,1.4217599232991536
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,131071,1.4183573722839355
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,1,0.030293333033720653
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,1,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,1,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,1,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,1,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,1,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,1,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,1,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,1,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,1,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,1,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,1,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,1,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,1,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,1,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,1,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,3,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,3,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,3,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,3,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,3,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,3,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,3,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,3,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,3,0.0447626660267512
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,3,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,3,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,3,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,3,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,3,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,3,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,3,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,7,0.029685333371162415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,7,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,7,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,7,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,7,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,7,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,7,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,7,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,7,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,7,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,7,0.025957333544890087
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,7,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,7,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,7,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,7,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,7,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,15,0.030949334303538006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,15,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,15,0.029722665747006733
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,15,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,15,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,15,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,15,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,15,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,15,0.044026667873064675
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,15,0.025578667720158894
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,15,0.02622933437426885
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,15,0.018090666582187016
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,15,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,15,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,15,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,15,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,31,0.030879999200503033
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,31,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,31,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,31,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,31,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,31,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,31,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,31,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,31,0.045456002155939736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,31,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,31,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,31,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,31,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,31,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,31,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,31,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,63,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,63,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,63,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,63,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,63,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,63,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,63,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,63,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,63,0.04398933549722036
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,63,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,63,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,63,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,63,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,63,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,63,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,63,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,127,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,127,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,127,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,127,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,127,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,127,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,127,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,127,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,127,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,127,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,127,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,127,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,127,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,127,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,127,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,127,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,255,0.04260799785455068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,255,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,255,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,255,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,255,0.02027733375628789
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,255,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,255,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,255,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,255,0.05007466673851013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,255,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,255,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,255,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,255,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,255,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,255,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,255,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,511,0.056330665946006775
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,511,0.05045866469542185
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,511,0.058575997749964394
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,511,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,511,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,511,0.04168533285458883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,511,0.040565334260463715
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,511,0.040735999743143715
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,511,0.057205334305763245
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,511,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,511,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,511,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,511,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,511,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,511,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,511,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,1023,0.08481599887212117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,1023,0.08046400050322215
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,1023,0.09559466441472371
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,1023,0.07053333520889282
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,1023,0.06832000116507213
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,1023,0.06791999936103821
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,1023,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,1023,0.0666133314371109
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,1023,0.07160000006357829
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,1023,0.054666668176651
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,1023,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,1023,0.0400693342089653
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,1023,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,1023,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,1023,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,1023,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,2047,0.1390720009803772
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,2047,0.13221866885821024
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,2047,0.17242133617401123
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,2047,0.12142933408419292
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,2047,0.12178666392962138
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,2047,0.1183733344078064
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,2047,0.12055466572443645
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,2047,0.11866133411725362
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,2047,0.09931199749310811
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,2047,0.08222400148709615
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,2047,0.09434666236241658
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,2047,0.06542400022347768
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,2047,0.06213866670926412
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,2047,0.061333333452542625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,2047,0.06089066465695699
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,2047,0.05983466903368632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,4095,0.2433440089225769
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,4095,0.24086399873097739
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,4095,0.32555200656255084
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,4095,0.22657066583633423
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,4095,0.22269866863886514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,4095,0.2223680019378662
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,4095,0.22340800364812216
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,4095,0.2178986668586731
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,4095,0.15356799960136414
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,4095,0.13120533029238382
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,4095,0.16433067123095194
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,4095,0.11160533626874287
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,4095,0.10756799578666687
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,4095,0.1076693336168925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,4095,0.10609599947929382
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,4095,0.10578133662541707
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,8191,0.4537760019302368
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,8191,0.458682656288147
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,8191,0.6231093406677246
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,8191,0.4333386818567912
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,8191,0.4240586757659912
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,8191,0.4285706679026286
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,8191,0.4313226540883382
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,8191,0.42929601669311523
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,8191,0.2576640049616496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,8191,0.2283733288447062
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,8191,0.3006613254547119
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,8191,0.19934932390848795
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,8191,0.19625065724054971
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,8191,0.19586666425069174
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,8191,0.19724800189336142
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,8191,0.1958400011062622
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,16383,0.8714346885681152
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,16383,0.8682186603546143
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,16383,1.2477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,16383,0.8405973116556803
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,16383,0.8324426809946696
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,16383,0.8288373152414957
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,16383,0.8299573262532552
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,16383,0.8267733256022135
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,16383,0.4648746649424235
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,16383,0.420576016108195
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,16383,0.57532266775767
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,16383,0.3794613281885783
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,16383,0.3745386600494385
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,16383,0.3766080141067505
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,16383,0.374234676361084
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,16383,0.3719093402226766
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,32767,1.7079413731892903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,32767,1.7034187316894531
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,32767,2.597439924875895
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,32767,1.6477600733439128
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,32767,1.6448052724202473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,32767,1.6425600051879883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,32767,1.6472533543904622
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,32767,1.6375946998596191
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,32767,0.878000020980835
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,32767,0.8008320331573486
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,32767,1.132853349049886
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,32767,0.7319200038909912
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,32767,0.728874683380127
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,32767,0.7269386450449625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,32767,0.7289760112762451
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,32767,0.7286026477813721
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,65535,3.3854347864786782
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,65535,3.3765331904093423
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,65535,5.309381484985352
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,65535,3.271029472351074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,65535,3.2488800684611
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,65535,3.2425387700398765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,65535,3.2526559829711914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,511,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,65535,3.248623847961426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,65535,1.7070666948954265
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,65535,1.5644319852193196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,65535,1.4378879865010579
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,65535,2.279797395070394
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,65535,1.4352426528930664
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,65535,1.431711991628011
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,4095,0.05880533158779144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,1,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,1,0.03587199995915095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,1,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,1,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,1,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,1,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,1,0.028101332485675812
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,1,0.027776000400384266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,65535,1.4327999750773113
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,1,0.07758933305740356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,1,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,1,0.04244266450405121
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,1,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,1,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,1,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,1,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,1,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,3,0.05142933130264282
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,3,0.0367999995748202
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,3,0.04636266827583313
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,3,0.029893333713213604
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,3,0.02788266787926356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,3,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,3,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,3,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,3,0.07909866670767467
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,3,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,3,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,3,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,3,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,3,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,3,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,3,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,7,0.05118933320045471
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,7,0.03678400069475174
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,7,0.04771733283996582
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,7,0.02972800036271413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,7,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,7,0.027786667148272198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,7,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,7,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,7,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,7,0.047482664386431374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,7,0.04207466542720795
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,7,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,7,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,7,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,7,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,15,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,15,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,15,0.04621866842110952
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,15,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,15,0.02796799937884013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,15,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,15,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,15,0.027952000498771667
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,15,0.07809600234031677
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,15,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,15,0.04200533529122671
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,15,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,15,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,15,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,15,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,15,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,31,0.05186133086681366
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,31,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,31,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,31,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,31,0.027888000011444092
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,31,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,31,0.027610667049884796
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,31,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,31,0.07865599791208903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,31,0.04797333478927612
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,31,0.04354666670163473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,31,0.027658666173617046
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,31,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,31,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,31,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,31,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,63,0.05144000053405762
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,63,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,63,0.046426668763160706
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,63,0.02980799973011017
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,63,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,63,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,63,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,63,0.029359998802344005
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,63,0.07891199986139934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,63,0.04806933303674062
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,63,0.04308266441027323
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,63,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,63,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,63,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,63,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,63,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,127,0.05423999826113383
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,127,0.043621331453323364
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,127,0.04984533290068308
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,127,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,127,0.030784000953038532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,127,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,127,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,127,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,127,0.07891199986139934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,127,0.05007466673851013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,127,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,127,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,127,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,127,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,127,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,127,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,255,0.06729066868623097
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,255,0.05816000203291575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,255,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,255,0.047210668524106346
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,255,0.043391997615496315
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,255,0.04215466479460398
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,65535,1.6065012613932292
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,255,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,255,0.041984001795450844
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,255,0.08574400345484416
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,255,0.056799997886021934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,255,0.04626133541266123
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,255,0.030133334298928578
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,255,0.02385066697994868
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,255,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,255,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,255,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,511,0.091648002465566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,511,0.08835200468699138
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,511,0.09158933162689209
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,511,0.07512533167997996
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,511,0.07071466743946075
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,511,0.07030400137106578
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,511,0.06932266553243001
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,511,0.06930133203665416
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,511,0.0962720016638438
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,511,0.06898133456707001
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,511,0.06447466711203258
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,511,0.04619200030962626
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,511,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,511,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,511,0.03878933439652125
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,511,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,1023,0.14215999841690063
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,1023,0.14205867052078247
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,1023,0.16316266854604086
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,1023,0.12532266974449158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,1023,0.12004266182581584
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,1023,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,1023,0.11740799744923909
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,1023,0.11844799915949504
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,1023,0.1218933363755544
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,1023,0.09538666407267253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,1023,0.09528000156084697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,1023,0.06975999971230824
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,1023,0.06322666505972545
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,1023,0.062080000837643944
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,1023,0.06258666515350342
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,1023,0.06178666651248932
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,2047,0.24150399367014566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,2047,0.2508266568183899
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,2047,0.2998666763305664
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,2047,0.23381332556406656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,2047,0.2257279952367147
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,2047,0.22719999154408774
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,2047,0.2238933245340983
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,2047,0.22106132904688516
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,2047,0.17006933689117432
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,2047,0.16030399998029074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,2047,0.14612799882888794
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,2047,0.11626133322715759
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,2047,0.1104746659596761
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,2047,0.10812800129254659
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,2047,0.10789866248766582
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,2047,0.10609066486358643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,4095,0.4336586793263753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,4095,0.4689653317133586
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,4095,0.5681653420130411
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,4095,0.44113067785898846
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,4095,0.4333813190460205
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,4095,0.4317813316980998
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,4095,0.4355733394622803
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,4095,0.43166931470235187
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,4095,0.2704426646232605
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,4095,0.2460106611251831
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,4095,0.2858240008354187
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,4095,0.2079520026842753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,4095,0.20114666223526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,4095,0.1992853283882141
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,4095,0.19767467180887857
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,4095,0.1976906657218933
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,8191,0.8158933321634928
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,8191,0.9068106810251871
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,8191,0.8621599674224854
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,8191,1.1104213396708171
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,8191,0.8476160367329916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,8191,0.8452959855397543
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,8191,0.8449813524881998
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,8191,0.8519039948781332
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,8191,0.45844801266988117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,8191,0.44099199771881104
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,8191,0.5289920171101888
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,8191,0.388314684232076
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,8191,0.3826346794764201
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,8191,0.38064531485239667
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,8191,0.37954668203989667
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,8191,0.37908267974853516
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,16383,1.5731306076049805
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,16383,1.7640053431193035
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,16383,1.6899627049763997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,16383,2.290442625681559
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,16383,1.6668906211853027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,16383,1.668874740600586
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,16383,1.6615254084269206
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,16383,0.831770658493042
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,16383,0.8271839618682861
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,16383,1.018117348353068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,16383,0.7491626739501953
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,16383,0.7435893217722574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,16383,0.7405172983805338
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,16383,0.7392373085021973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,16383,0.7399946848551432
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,32767,3.101994514465332
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,32767,3.499882698059082
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,32767,4.810533205668132
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,32767,3.374426523844401
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,32767,3.3190720876057944
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,32767,3.3386452992757163
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,32767,3.3182239532470703
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,32767,3.332159996032715
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,32767,1.575503985087077
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,32767,1.6026612917582195
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,32767,2.048346678415934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,32767,1.4692959785461426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,32767,1.4644427299499512
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,32767,1.4616533915201824
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,32767,1.4612800280253093
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,32767,1.4583412806193035
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,1,0.08653333783149719
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,1,0.06170133252938589
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,1,0.08230933547019958
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,1,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,1,0.04541866481304169
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,1,0.04411733150482178
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,1,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,1,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,1,0.1431839962800344
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,1,0.08313600222269694
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,1,0.07773333291212718
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,1,0.0440533310174942
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,1,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,1,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,1,0.03253866732120514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,1,0.03162666658560435
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,3,0.08460799853006999
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,3,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,3,0.08285333216190338
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,3,0.05048533280690511
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,3,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,3,0.04472533365090688
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,3,0.04533866544564565
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,3,0.044266665975252785
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,3,0.1443893313407898
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,3,0.08416000008583069
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,3,0.07650133470694225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,3,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,3,0.033887999753157295
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,3,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,3,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,3,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,7,0.08500799536705017
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,7,0.06246933341026306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,7,0.0817333310842514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,7,0.051301335295041404
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,7,0.04496533175309499
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,7,0.044639999667803444
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,7,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,7,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,7,0.14407466848691305
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,7,0.08388266960779826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,7,0.07698133091131847
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,7,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,7,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,7,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,7,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,7,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,15,0.08596799770991008
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,15,0.060640002290407814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,15,0.08171199758847554
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,15,0.050586665670077004
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,15,0.04456533491611481
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,15,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,15,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,15,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,15,0.14240533113479614
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,15,0.08342400193214417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,15,0.07855999966462453
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,15,0.044031997521718345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,15,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,15,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,15,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,15,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,31,0.0888266662756602
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,31,0.061306665341059365
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,31,0.08261866867542267
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,31,0.05141866703828176
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,31,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,31,0.04437333345413208
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,31,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,31,0.04404800136884054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,31,0.14292266964912415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,31,0.08310933411121368
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,31,0.07732800145943959
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,31,0.04394666850566864
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,31,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,31,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,31,0.03181333343187968
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,31,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,63,0.08494399984677632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,63,0.06048533320426941
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,63,0.08239466448624928
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,63,0.05100266635417938
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,63,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,63,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,63,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,63,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,63,0.14415466785430908
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,63,0.08450667063395183
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,63,0.07729599873224895
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,63,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,63,0.03385066737731298
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,63,0.03190933416287104
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,63,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,63,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,127,0.09056533376375835
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,127,0.0684853345155716
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,127,0.0858133335908254
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,127,0.06029866635799408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,127,0.05417599777380625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,127,0.05400000015894572
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,127,0.05301866432030996
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,127,0.052832002441088356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,127,0.14337066809336343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,127,0.08501866459846497
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,127,0.07919466495513916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,127,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,127,0.03677333394686381
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,127,0.034117333590984344
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,127,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,127,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,255,0.11251733700434367
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,255,0.09799999992052714
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,255,0.09780266880989075
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,255,0.07287999987602234
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,255,0.07001600166161855
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,255,0.0685280015071233
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,255,0.0680320014556249
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,255,0.06651199857393901
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,255,0.15380266308784485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,255,0.09930133819580078
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,255,0.08079466720422109
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,255,0.05251200000445048
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,255,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,255,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,255,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,255,0.03841066608826319
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,511,0.16430399815241495
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,511,0.14689600467681885
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,511,0.16824533541997275
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,511,0.12309333682060242
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,511,0.11880532900492351
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,511,0.11849600076675415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,511,0.11681600411732991
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,511,0.11641599734624226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,511,0.17693867286046347
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,511,0.1209493378798167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,511,0.11576533317565918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,511,0.07515199979146321
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,511,0.06618133187294006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,511,0.06214400132497152
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,511,0.06235733131567637
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,511,0.06141866743564606
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,1023,0.26504000027974445
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,1023,0.2471733291943868
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,1023,0.30635732412338257
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,1023,0.2192373275756836
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,1023,0.2125493288040161
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,1023,0.2103253404299418
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,1023,0.20888533194859824
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,1023,0.20618667205174765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,1023,0.22585066159566244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,1023,0.1649066706498464
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,1023,0.17668267091115317
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,1023,0.11958400408426921
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,1023,0.1076639990011851
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,1023,0.10522133111953735
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,1023,0.10409599542617798
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,1023,0.10317867000897725
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,2047,0.4641226530075073
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,2047,0.45313068230946857
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,2047,0.5818399985631307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,2047,0.42534931500752765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,2047,0.4128906726837158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,2047,0.408735990524292
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,2047,0.40770665804545086
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,2047,0.4039359887440999
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,2047,0.3250719904899597
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,2047,0.2600586613019307
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,2047,0.30508265892664593
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,2047,0.20631466309229532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,2047,0.19519466161727905
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,2047,0.19025067488352457
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,2047,0.18939733505249023
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,2047,0.1872319976488749
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,4095,0.8457280000050863
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,4095,0.8613920211791992
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,4095,1.112773338953654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,4095,0.8182773590087891
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,4095,0.8075946966807047
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,4095,0.7951947053273519
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,4095,0.7949173450469971
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,4095,0.7923040390014648
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,4095,0.5212479829788208
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,4095,0.43958401679992676
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,4095,0.5520319938659668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,4095,0.37601598103841144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,4095,0.36345068613688153
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,4095,0.35957332452138263
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,4095,0.35841600100199383
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,4095,0.35582399368286133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,8191,1.6078880627950032
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,65535,1.4352533022562664
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,8191,1.6519625981648762
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,8191,2.192922592163086
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,8191,1.5996425946553547
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,8191,1.5816267331441243
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,8191,1.5752746264139812
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,8191,1.5728747049967449
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,8191,1.5660266876220703
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,8191,0.8974933624267578
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,8191,0.7977120081583658
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,8191,1.037765343983968
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,8191,0.712773323059082
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,8191,0.7004160086313883
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,7,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,8191,0.6975893179575602
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,8191,0.6938400268554688
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,8191,0.6898293495178223
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,16383,3.131253242492676
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,16383,3.2640161514282227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,16383,4.590799967447917
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,16383,3.148170789082845
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,16383,3.1269280115763345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,16383,3.137989362080892
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,16383,3.097599983215332
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,16383,3.114762624104818
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,16383,1.642741362253825
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,16383,1.507146676381429
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,16383,1.3776480356852214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,16383,2.0097920099894204
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,16383,1.368127981821696
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,16383,1.3618399302164714
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,16383,1.3629066149393718
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,16383,1.3606826464335124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,16383,1.6760692596435547
