framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,1,0.013765333841244379
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,1,0.01621333385507266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,1,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,1,0.01328533391157786
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,1,0.011029332876205444
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,1,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,1,0.01211200033624967
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,1,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,1,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,1,0.012789333860079447
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,1,0.011850666254758835
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,1,0.0122079998254776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,1,0.014720000326633453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,1,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,3,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,3,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,3,0.013989333063364029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,3,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,3,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,3,0.012144000579913458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,3,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,3,0.013733333597580591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,3,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,3,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,3,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,3,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,7,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,3,0.013754667093356451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,3,0.013429333766301474
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,7,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,7,0.01332266628742218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,7,0.012389333297808966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,7,0.012389333297808966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,7,0.012357333054145178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,7,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,7,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,7,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,7,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,7,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,7,0.012810666114091873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,7,0.012432000289360682
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,15,0.013850666582584381
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,15,0.01357866699496905
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,15,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,15,0.014245333770910898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,15,0.01379199946920077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,15,0.012506666282812754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,15,0.0141546664138635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,15,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,15,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,15,0.013776000589132309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,15,0.013882666826248169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,15,0.01381333296497663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,15,0.011034666250149408
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,15,0.014256000518798828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,31,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,31,0.012389333297808966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,31,0.012389333297808966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,31,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,31,0.011781333635250727
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,31,0.011776000261306763
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,31,0.013418667018413544
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,31,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,31,0.013786666095256805
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,31,0.013594667116800943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,31,0.013493333011865616
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,31,0.013861333330472311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,31,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,31,0.012144000579913458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,63,0.013999999811251959
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,63,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,63,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,63,0.015077333897352219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,63,0.013562666873137156
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,63,0.014165333161751429
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,63,0.012901333471139273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,63,0.014277332772811254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,63,0.013957332819700241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,63,0.01423466702302297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,63,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,63,0.012149333953857422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,63,0.011952000359694162
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,63,0.013642666240533194
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,127,0.013936000565687815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,127,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,127,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,127,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,127,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,127,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,127,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,127,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,127,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,127,0.010847999403874079
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,127,0.012389333297808966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,127,0.0138026662170887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,255,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,255,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,255,0.013738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,255,0.01110400011142095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,255,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,255,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,255,0.011786667009194693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,255,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,255,0.011893333246310553
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,255,0.013301332791646322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,255,0.012128000458081564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,255,0.012229333321253458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,255,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,511,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,511,0.0145066666106383
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,511,0.016469333320856094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,511,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,511,0.014389333625634512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,511,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,511,0.014842666685581207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,511,0.014746667196353277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,511,0.016480000068744022
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,511,0.014773332824309668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,511,0.01444799949725469
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,511,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,1023,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,1023,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,1023,0.015306666493415833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,1023,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,1023,0.014389333625634512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,1023,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,1023,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,1023,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,1023,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,1023,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,1023,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,1023,0.01444799949725469
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,1023,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,1023,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,2047,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,2047,0.0164533331990242
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,2047,0.01682666689157486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,2047,0.014837333311637243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,2047,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,2047,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,2047,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,2047,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,2047,0.014511999984582266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,2047,0.01603200038274129
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,2047,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,2047,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,4095,0.020768000433842342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,4095,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,4095,0.017317333569129307
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,4095,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,4095,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,4095,0.016458666572968166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,4095,0.02090666691462199
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,4095,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,4095,0.016885332763195038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,4095,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,4095,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,8191,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,8191,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,8191,0.020762667059898376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,8191,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,8191,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,8191,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,8191,0.024122667809327442
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,8191,0.02110933264096578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,8191,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,8191,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,8191,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,16383,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,16383,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,16383,0.026922665536403656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,16383,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,16383,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,16383,0.029279999434947968
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,16383,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,16383,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,16383,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,16383,0.026634665826956432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,32767,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,32767,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,32767,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,32767,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,32767,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,32767,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,32767,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,32767,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,32767,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,32767,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,65535,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,65535,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,65535,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,65535,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,65535,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,65535,0.04163199911514918
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,65535,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,65535,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,65535,0.0484746644894282
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,65535,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,65535,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,65535,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,1,131071,0.0675786683956782
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,131071,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,4,131071,0.06211199859778086
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,8,131071,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,16,131071,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,32,131071,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,64,131071,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,1,131071,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,2,131071,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,4,131071,0.06076266864935557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,8,131071,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,16,131071,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,32,131071,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,64,131071,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,1,0.014469332993030548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,1,0.013616000612576803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,1,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,1,0.01394133393963178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,1,0.013733333597580591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,1,0.013946666071812311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,1,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,1,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,1,0.012389333297808966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,1,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,1,0.014282666146755219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,1,0.014069333672523499
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,1,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,1,0.013807999591032663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,3,0.014181333283583323
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,3,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,3,0.013552000125249227
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,3,0.013882666826248169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,3,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,3,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,3,0.013631999492645264
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,3,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,3,0.014096000542243322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,3,0.014805333067973455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,3,0.012805332740147909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,3,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,3,0.012080000092585882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,3,0.013754667093356451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,7,0.014352000008026758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,7,0.013232000172138214
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,7,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,7,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,7,0.013616000612576803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,7,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,7,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,7,0.013557333499193192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,7,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,7,0.013744000345468521
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,7,0.012053333222866058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,7,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,15,0.014352000008026758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,15,0.013450667262077332
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,15,0.012458667159080505
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,15,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,15,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,15,0.013349333157142004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,15,0.012223999947309494
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,15,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,15,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,15,0.01403733342885971
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,15,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,15,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,15,0.012746666868527731
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,15,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,31,0.013946666071812311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,31,0.01370666672786077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,31,0.013749333719412485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,31,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,31,0.013674666484196981
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,31,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,31,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,31,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,31,0.01421333352724711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,31,0.013888000200192133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,31,0.013925333817799887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,31,0.013546666751305262
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,31,0.011546666423479715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,31,0.013434667140245438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,63,0.0143306665122509
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,63,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,63,0.0122079998254776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,63,0.01379199946920077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,2,7,0.012469333906968435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,63,0.012341332932313284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,63,0.012773333738247553
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,63,0.012506666282812754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,63,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,63,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,63,0.013440000514189402
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,63,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,63,0.013167999684810638
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,63,0.013957332819700241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,127,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,127,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,127,0.013584000368913015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,127,0.01379199946920077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,127,0.013610667238632837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,127,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,127,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,127,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,127,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,127,0.012383999923865
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,127,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,127,0.013951999445756277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,127,0.013477332890033722
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,255,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,255,0.013605333864688873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,255,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,255,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,255,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,255,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,255,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,255,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,255,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,255,0.013530666629473368
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,255,0.013301332791646322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,255,0.01341333364446958
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,255,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,511,0.015717333803574245
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,511,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,511,0.014442666123310724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,511,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,511,0.013999999811251959
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,511,0.013045333325862885
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,511,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,511,0.014218666901191076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,511,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,511,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,511,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,511,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,511,0.013872000078360239
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,1023,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,1023,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,1023,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,1023,0.014005333185195923
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,1023,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,1023,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,1023,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,1023,0.014965333044528961
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,1023,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,1023,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,1023,0.014453332871198654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,1023,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,1023,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,1023,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,2047,0.017290666699409485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,2047,0.012901333471139273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,2047,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,2047,0.014885333677132925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,2047,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,2047,0.014192000031471252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,2047,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,2047,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,2047,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,2047,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,2047,0.014165333161751429
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,2047,0.01301866645614306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,2047,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,4095,0.019039999693632126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,4095,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,4095,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,4095,0.018426666657129925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,4095,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,4095,0.02041600023706754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,4095,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,4095,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,4095,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,4095,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,4095,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,8191,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,8191,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,8191,0.021114667256673176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,8191,0.02149333308140437
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,8191,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,8191,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,8191,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,8191,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,8191,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,8191,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,8191,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,16383,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,16383,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,16383,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,16383,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,16383,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,16383,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,16383,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,16383,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,16383,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,16383,0.03036266565322876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,16383,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,16383,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,16383,0.04028266668319702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,16383,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,32767,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,32767,0.04127466678619385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,32767,0.05426133175690969
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,32767,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,32767,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,32767,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,32767,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,32767,0.052560001611709595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,32767,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,32767,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,32767,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,65535,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,65535,0.06451733410358429
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,65535,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,65535,0.07543999950091045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,65535,0.07986666758855183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,65535,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,65535,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,65535,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,65535,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,65535,0.06280000011126201
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,65535,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,65535,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,65535,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,65535,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,1,131071,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,2,131071,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,4,131071,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,8,131071,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,16,131071,0.10342933734258015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,32,131071,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,64,131071,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,131071,0.09899200002352397
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,1,131071,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,4,131071,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,8,131071,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,131071,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,32,131071,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,64,131071,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,1,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,1,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,1,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,1,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,1,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,1,0.013674666484196981
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,1,0.013210666676362356
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,2,7,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,1,0.01714133347074191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,1,0.015237333873907724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,1,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,1,0.013594667116800943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,1,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,16,7,0.014149333039919535
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,1,0.014346666634082794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,3,0.013888000200192133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,3,0.01404800017674764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,3,0.013701333353916803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,3,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,3,0.014005333185195923
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,3,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,3,0.014453332871198654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,3,0.014229333649079004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,3,0.01394133393963178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,3,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,3,0.014511999984582266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,3,0.013717333475748697
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,3,0.013898666948080063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,3,0.012229333321253458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,7,0.013199999928474426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,7,0.014549333602190018
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,7,0.01402666668097178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,7,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,7,0.014282666146755219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,7,0.014186666657527288
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,7,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,7,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,7,0.014181333283583323
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,7,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,7,0.013823999712864557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,7,0.01250133290886879
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,7,0.013440000514189402
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,15,0.014303999642531076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,15,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,15,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,15,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,15,0.01393066719174385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,15,0.013914667069911957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,15,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,15,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,15,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,15,0.012223999947309494
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,15,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,15,0.013370666652917862
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,15,0.013557333499193192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,31,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,31,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,31,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,31,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,31,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,31,0.014352000008026758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,31,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,31,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,31,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,31,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,63,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,63,0.012837332983811697
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,63,0.014064000298579534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,63,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,63,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,63,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,63,0.01360000049074491
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,63,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,63,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,63,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,63,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,63,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,63,0.013839999834696451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,63,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,127,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,127,0.014271999398867289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,127,0.013712000101804733
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,127,0.012842666357755661
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,127,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,127,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,127,0.013301332791646322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,127,0.013983999689420065
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,127,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,127,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,127,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,127,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,127,0.012805332740147909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,127,0.012357333054145178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,255,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,255,0.015189333508412043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,255,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,255,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,255,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,255,0.014250667144854864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,255,0.013776000589132309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,255,0.013999999811251959
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,255,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,255,0.014175999909639359
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,255,0.013493333011865616
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,255,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,255,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,511,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,511,0.013359999905029932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,511,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,511,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,511,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,511,0.013381333400805792
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,511,0.014485333114862442
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,511,0.015077333897352219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,511,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,511,0.014463999619086584
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,511,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,511,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,511,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,511,0.014479999740918478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,1023,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,1023,0.014901333798964819
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,1023,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,1023,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,1023,0.013221333424250284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,1023,0.01321600005030632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,1023,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,1023,0.014917333920796713
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,1023,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,1023,0.01422400027513504
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,1023,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,1023,0.013210666676362356
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,1023,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,1023,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,2047,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,2047,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,2047,0.01647466669480006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,2047,0.016282666474580765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,2047,0.016447999825080235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,2047,0.015482666591803232
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,2047,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,2047,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,2047,0.016970666746298473
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,2047,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,2047,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,2047,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,2047,0.015781333049138386
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,4095,0.025146665672461193
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,4095,0.020666666328907013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,4095,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,4095,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,4095,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,4095,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,4095,0.020703999946514767
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,4095,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,4095,0.01868266612291336
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,4095,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,4095,0.017018667111794155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,4095,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,8191,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,8191,0.031040000418821972
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,8191,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,8191,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,8191,0.028336000939210255
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,8191,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,8191,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,8191,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,16383,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,16383,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,16383,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,16383,0.03856533269087473
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,16383,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,7,0.01314666618903478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,16383,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,16383,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,16383,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,16383,0.03857066730658213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,16383,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,32767,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,32767,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,32767,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,32767,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,32767,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,32767,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,32767,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,32767,0.05494933327039083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,31,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,32767,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,32767,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,32767,0.05939733485380808
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,32767,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,32767,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,32767,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,65535,0.08224533498287201
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,65535,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,65535,0.07849599917729695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,65535,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,65535,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,65535,0.08567466338475545
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,65535,0.08738133311271667
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,65535,0.08193066716194153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,65535,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,31,0.01258133351802826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,65535,0.08088533580303192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,65535,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,65535,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,1,131071,0.13038399815559387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,65535,0.08601066470146179
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,2,131071,0.16332800189654031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,4,131071,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,8,131071,0.13516799608866373
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,16,131071,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,64,131071,0.1353333294391632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,32,131071,0.13397333025932312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,2,131071,0.16332800189654031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,1,131071,0.13106667002042136
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,4,131071,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,8,131071,0.13516799608866373
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,131071,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,32,131071,0.1341973344484965
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,64,131071,0.13482667009035745
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,1,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,1,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,1,0.013130666067202887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,1,0.014159999787807465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,1,0.013786666095256805
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,1,0.013749333719412485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,1,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,1,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,1,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,1,0.012826666235923767
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,1,0.013904000322024027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,1,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,1,0.014010666559139887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,1,0.014208000153303146
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,3,0.013877333452304205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,3,0.013951999445756277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,3,0.013850666582584381
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,3,0.014858666807413101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,3,0.013781332721312841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,3,0.01481066644191742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,3,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,3,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,3,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,3,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,3,0.014159999787807465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,3,0.01421333352724711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,3,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,3,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,7,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,7,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,7,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,7,0.013770667215188345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,7,0.013855999956528345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,7,0.014021333307027817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,7,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,7,0.014965333044528961
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,7,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,7,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,7,0.014069333672523499
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,7,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,15,0.014933332800865173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,15,0.013199999928474426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,15,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,15,0.013770667215188345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,15,0.013823999712864557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,15,0.014010666559139887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,15,0.0138026662170887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,15,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,15,0.01368533323208491
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,15,0.014042666802803675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,15,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,15,0.014122666170199713
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,15,0.013471999516089758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,15,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,31,0.014490666488806406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,31,0.013712000101804733
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,31,0.01481066644191742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,31,0.014197333405415217
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,31,0.013829333086808523
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,31,0.016783999900023144
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,31,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,31,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,31,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,31,0.014853333433469137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,31,0.013882666826248169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,31,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,31,0.01498666654030482
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,63,0.0145066666106383
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,31,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,63,0.013477332890033722
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,63,0.014864000181357065
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,63,0.01414399966597557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,63,0.014474666366974512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,63,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,63,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,63,0.012223999947309494
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,63,0.013823999712864557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,63,0.014730667074521383
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,63,0.013503999759753546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,63,0.014064000298579534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,63,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,63,0.013573333621025085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,127,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,127,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,127,0.013610667238632837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,127,0.014021333307027817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,127,0.013829333086808523
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,127,0.01422400027513504
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,127,0.01470400020480156
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,127,0.014159999787807465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,127,0.013621332744757334
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,127,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,127,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,127,0.014015999933083853
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,127,0.013797332843144735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,127,0.014159999787807465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,255,0.014848000059525171
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,255,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,255,0.014378666877746582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,255,0.014458666245142618
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,255,0.013765333841244379
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,255,0.01441066712141037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,255,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,255,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,255,0.013797332843144735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,255,0.012960000584522883
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,255,0.013728000223636627
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,255,0.013861333330472311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,255,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,255,0.013765333841244379
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,511,0.01509333277742068
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,511,0.015077333897352219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,511,0.014773332824309668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,511,0.013493333011865616
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,511,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,511,0.014309333016475042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,511,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,511,0.013855999956528345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,511,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,511,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,511,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,511,0.01267733300725619
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,511,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,511,0.014303999642531076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,1023,0.0183146670460701
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,1023,0.015247999380032221
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,1023,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,1023,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,1023,0.015072000523408255
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,1023,0.015247999380032221
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,1023,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,1023,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,1023,0.015429332852363586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,1023,0.014794666320085526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,1023,0.015872000406185787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,1023,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,1023,0.01444799949725469
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,2047,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,2047,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,2047,0.017125333348910015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,2047,0.016789333273967106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,2047,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,2047,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,2047,0.02481599897146225
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,2047,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,2047,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,2047,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,2047,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,2047,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,2047,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,4095,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,4095,0.025194667279720306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,4095,0.02072000006834666
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,4095,0.023445333043734234
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,4095,0.023141334454218548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,4095,0.02277333289384842
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,4095,0.02309866746266683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,4095,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,4095,0.02685333291689555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,4095,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,4095,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,4095,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,8191,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,4095,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,8191,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,8191,0.028922667105992634
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,8191,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,8191,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,8191,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,16383,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,16383,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,16383,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,16383,0.05086933573087057
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,16383,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,16383,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,16383,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,16383,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,16383,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,16383,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,16383,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,16383,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,16383,0.0529120018084844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,16383,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,32767,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,32767,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,32767,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,32767,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,32767,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,32767,0.07850133379300435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,32767,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,32767,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,32767,0.09660266836484273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,32767,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,32767,0.07680533329645793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,32767,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,32767,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,32767,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,65535,0.13005333145459494
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,65535,0.16264533003171286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,65535,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,65535,0.12492799758911133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,65535,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,65535,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,65535,0.1256053348382314
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,65535,0.16571733355522156
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,65535,0.13294399778048197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,65535,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,65535,0.12868266304334006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,65535,0.12834133704503378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,65535,0.12834133704503378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,65535,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,1,131071,0.23859200874964395
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,2,131071,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,4,131071,0.2293706734975179
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,8,131071,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,16,131071,0.22766933838526407
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,32,131071,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,64,131071,0.22869332631429037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,1,131071,0.2450773318608602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,2,131071,0.30019734303156537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,4,131071,0.23449599742889404
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,8,131071,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,16,131071,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,32,131071,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,64,131071,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,1,0.014773332824309668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,1,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,1,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,1,0.01360000049074491
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,1,0.013973332941532135
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,1,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,1,0.01392000044385592
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,1,0.014890667051076889
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,1,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,1,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,1,0.014773332824309668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,1,0.01403733342885971
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,1,0.014165333161751429
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,3,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,3,0.013189333180586496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,3,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,3,0.01643199970324834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,3,0.014346666634082794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,3,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,3,0.014554666976133982
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,3,0.015077333897352219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,3,0.013072000195582708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,3,0.013834666460752487
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,3,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,3,0.01368533323208491
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,3,0.014032000054915747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,3,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,7,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,7,0.015824000040690105
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,7,0.013178666432698568
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,7,0.013674666484196981
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,7,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,7,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,7,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,7,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,7,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,7,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,7,0.013957332819700241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,7,0.013936000565687815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,7,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,7,0.013978666315476099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,15,0.01471466695268949
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,15,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,15,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,15,0.014991999914248785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,15,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,15,0.01369599997997284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,15,0.014314666390419006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,15,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,15,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,15,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,15,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,15,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,15,0.013951999445756277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,15,0.012831999609867731
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,31,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,31,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,31,0.013781332721312841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,31,0.013770667215188345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,31,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,31,0.01509333277742068
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,31,0.014192000031471252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,16,65535,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,31,0.013546666751305262
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,31,0.015504000087579092
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,31,0.01379199946920077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,31,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,63,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,63,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,63,0.014490666488806406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,63,0.013770667215188345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,63,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,63,0.0144213338692983
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,63,0.012357333054145178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,63,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,63,0.014794666320085526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,63,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,63,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,63,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,63,0.013130666067202887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,127,0.014933332800865173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,127,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,127,0.01303999995191892
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,127,0.014597332725922266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,127,0.014533333480358124
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,127,0.01441066712141037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,127,0.014303999642531076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,127,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,127,0.013754667093356451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,127,0.012453333785136541
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,127,0.014864000181357065
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,127,0.01676799977819125
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,127,0.014864000181357065
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,127,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,255,0.016586666305859882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,255,0.014389333625634512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,255,0.014250667144854864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,255,0.013797332843144735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,255,0.014293332894643148
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,255,0.014170666535695394
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,255,0.01402666668097178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,255,0.016517333686351776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,255,0.014229333649079004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,255,0.013904000322024027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,255,0.012549333274364471
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,255,0.013471999516089758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,255,0.01314666618903478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,511,0.014970666418472925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,511,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,511,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,511,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,511,0.014271999398867289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,511,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,511,0.014842666685581207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,511,0.01552533358335495
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,511,0.01441066712141037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,511,0.014378666877746582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,511,0.014416000495354334
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,1023,0.02314666658639908
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,1023,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,1023,0.018426666657129925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,1023,0.016127999871969223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,1023,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,1023,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,1023,0.02311466634273529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,1023,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,1023,0.01701333373785019
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,1023,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,1023,0.015594666202863058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,1023,0.015077333897352219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,1023,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,2047,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,2047,0.02309866746266683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,2047,0.01869333287080129
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,2047,0.018618666877349217
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,2047,0.01970133309563001
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,2047,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,2047,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,2047,0.023775999744733173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,2047,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,2047,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,2047,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,2047,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,2047,0.018522666146357853
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,4095,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,4095,0.03173333406448364
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,4095,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,4095,0.02651199946800868
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,4095,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,4095,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,4095,0.025087999800841015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,4095,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,4095,0.03378133227427801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,4095,0.026943999032179516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,4095,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,4095,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,4095,0.025493333737055462
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,4095,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,8191,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,8191,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,8191,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,8191,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,8191,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,8191,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,8191,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,8191,0.05904533465703329
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,8191,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,8191,0.048800001541773476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,8191,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,8191,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,8191,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,16383,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,16383,0.0962559978167216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,16383,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,16383,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,16383,0.07235733171304067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,16383,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,16383,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,16383,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,16383,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,16383,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,16383,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,16383,0.0744053324063619
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,16383,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,32767,0.13329600294431052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,16383,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,32767,0.17423999309539795
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,32767,0.12492799758911133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,32767,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,32767,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,32767,0.12152000268300374
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,32767,0.12288000186284383
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,32767,0.13363200426101685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,32767,0.17306133111317953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,32767,0.12219732999801636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,32767,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,32767,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,32767,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,32767,0.12185600399971008
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,65535,0.23381332556406656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,65535,0.3247893253962199
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,65535,0.22220800320307413
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,65535,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,65535,0.2187946637471517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,65535,0.2218773365020752
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,65535,0.21435733636220297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,65535,0.23347200949986777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,65535,0.3237599929173787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,65535,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,65535,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,65535,0.22050132354100546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,65535,0.21401600042978922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,65535,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,1,131071,0.4288853406906128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,2,131071,0.6299253304799398
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,4,131071,0.41095999876658124
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,8,131071,0.40857601165771484
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,16,131071,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,32,131071,0.4082346757253011
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,64,131071,0.4061973492304484
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,1,131071,0.43195732434590656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,2,131071,0.628053347269694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,4,131071,0.4116479953130086
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,8,131071,0.4072106679280599
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,16,131071,0.40926400820414227
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,131071,0.4089173475901286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,1,0.015504000087579092
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,64,131071,0.4078986644744873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,1,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,1,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,1,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,1,0.013199999928474426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,1,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,1,0.013898666948080063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,1,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,1,0.014831999937693277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,1,0.014405333747466406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,1,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,1,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,1,0.01313599944114685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,1,0.01451733335852623
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,3,0.014938666174809137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,3,0.014874666929244995
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,3,0.01440000037352244
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,3,0.013082666943470636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,3,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,3,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,3,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,3,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,3,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,3,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,3,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,3,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,3,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,3,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,7,0.014965333044528961
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,7,0.014901333798964819
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,7,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,7,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,7,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,7,0.013199999928474426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,7,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,7,0.016789333273967106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,7,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,7,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,7,0.01440000037352244
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,7,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,7,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,7,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,15,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,15,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,15,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,15,0.014373333503802618
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,15,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,15,0.015119999647140503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,15,0.0145066666106383
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,15,0.01791999985774358
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,15,0.013823999712864557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,15,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,15,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,15,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,31,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,15,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,31,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,31,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,31,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,31,0.01441066712141037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,31,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,31,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,31,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,31,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,31,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,31,0.013077333569526672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,31,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,63,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,63,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,63,0.014720000326633453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,63,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,63,0.013258667041858038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,63,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,63,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,63,0.015087999403476715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,63,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,63,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,63,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,63,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,63,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,63,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,127,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,127,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,127,0.014544000228246054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,127,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,127,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,127,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,127,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,127,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,127,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,127,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,127,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,127,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,127,0.013258667041858038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,255,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,255,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,255,0.013077333569526672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,255,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,255,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,255,0.013823999712864557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,255,0.014389333625634512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,255,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,255,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,255,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,255,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,255,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,255,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,255,0.013823999712864557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,511,0.020746666938066483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,511,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,511,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,511,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,511,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,511,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,511,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,511,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,511,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,511,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,511,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,511,0.014794666320085526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,511,0.015189333508412043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,511,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,1023,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,1023,0.023103999594847362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,1023,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,1023,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,1023,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,1023,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,1023,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,1023,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,1023,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,1023,0.016783999900023144
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,1023,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,1023,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,1023,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,2047,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,2047,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,2047,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,2047,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,2047,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,2047,0.02290133386850357
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,2047,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,2047,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,2047,0.02090666691462199
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,2047,0.02219199885924657
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,4095,0.05770133435726166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,4095,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,4095,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,4095,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,4095,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,4095,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,4095,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,4095,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,4095,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,4095,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,4095,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,4095,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,4095,0.04333333174387614
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,4095,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,8191,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,8191,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,8191,0.07201600074768066
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,8191,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,8191,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,8191,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,8191,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,8191,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,8191,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,8191,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,8191,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,8191,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,8191,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,8191,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,16383,0.13566933075586954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,16383,0.17322667439778647
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,16383,0.12220799922943115
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,16383,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,16383,0.12014399965604146
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,16383,0.1181013286113739
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,16383,0.13755733768145242
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,16383,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,16383,0.1723733345667521
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,16383,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,16383,0.12015466888745625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,16383,0.11946666240692139
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,16383,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,16383,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,32767,0.23995733261108398
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,32767,0.32477333148320514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,32767,0.22357332706451416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,32767,0.21776533126831055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,32767,0.21913599967956543
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,32767,0.21640533208847046
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,32767,0.21776533126831055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,32767,0.2409813404083252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,32767,0.32443734010060626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,32767,0.22425599892934164
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,32767,0.22050132354100546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,32767,0.22153067588806152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,32767,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,32767,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,65535,0.4452693462371826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,65535,0.6312959988911947
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,65535,0.4264906644821167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,65535,0.41915734608968097
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,65535,0.41779200236002606
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,65535,0.4215466578801473
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,65535,0.4452693462371826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,65535,0.6294240156809489
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,65535,0.41915734608968097
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,65535,0.4189759890238444
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,65535,0.4261546532313029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,65535,0.4194986820220947
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,65535,0.41710933049519855
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,65535,0.4154026508331299
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,1,131071,0.8536746501922607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,2,131071,1.2373332977294922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,4,131071,0.8238080342610677
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,8,131071,0.814250628153483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,16,131071,0.8128853638966879
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,32,131071,0.8108373483022054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,64,131071,0.8111786842346191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,1,131071,0.8523093064626058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,2,131071,1.2380159695943196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,4,131071,0.8200533390045166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,8,131071,0.8101546764373779
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,16,131071,0.8091306686401367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,32,131071,0.8111733595530192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,1,0.014602666099866232
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,64,131071,0.814250628153483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,1,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,1,0.014896000425020853
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,32,8191,0.04538666705290476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,1,0.017029333859682083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,1,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,1,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,1,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,1,0.016095999628305435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,1,0.014117332796255747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,1,0.014746667196353277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,1,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,1,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,1,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,1,0.01441066712141037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,3,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,3,0.016538667182127636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,3,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,3,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,3,0.014885333677132925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,3,0.016048000504573185
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,3,0.014442666123310724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,3,0.01691199963291486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,3,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,3,0.01479999969402949
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,3,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,3,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,3,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,3,0.01441066712141037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,7,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,7,0.01682666689157486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,7,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,7,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,7,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,7,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,7,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,7,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,7,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,7,0.016837333639462788
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,7,0.01588800052801768
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,7,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,7,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,7,0.014848000059525171
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,15,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,15,0.01526933287580808
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,15,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,15,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,15,0.014602666099866232
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,15,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,15,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,15,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,15,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,15,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,15,0.014917333920796713
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,15,0.014096000542243322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,15,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,15,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,31,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,31,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,31,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,31,0.01440000037352244
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,31,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,31,0.0141546664138635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,31,0.01443733274936676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,31,0.016976000120242436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,31,0.014970666418472925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,31,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,31,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,31,0.014592000593741735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,31,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,31,0.01481066644191742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,63,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,63,0.016832000265518825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,63,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,63,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,63,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,63,0.014271999398867289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,63,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,63,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,63,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,63,0.015119999647140503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,63,0.014837333311637243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,63,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,63,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,63,0.014592000593741735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,127,0.014730667074521383
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,127,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,127,0.014970666418472925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,127,0.014794666320085526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,127,0.014453332871198654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,127,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,127,0.015072000523408255
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,127,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,127,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,127,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,127,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,127,0.013082666943470636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,127,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,255,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,127,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,255,0.016229332735141117
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,255,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,255,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,255,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,255,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,255,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,255,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,255,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,255,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,255,0.013999999811251959
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,511,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,511,0.017018667111794155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,511,0.016842667013406754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,511,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,511,0.01647466669480006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,511,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,511,0.03377600014209747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,511,0.020762667059898376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,511,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,511,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,511,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,511,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,1023,0.040965333580970764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,1023,0.034128000338872276
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,1023,0.023141334454218548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,1023,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,1023,0.020746666938066483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,1023,0.021040000021457672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,1023,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,1023,0.03926933308442434
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,1023,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,1023,0.020986666282018025
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,2047,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,2047,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,2047,0.04846400022506714
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,2047,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,2047,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,2047,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,2047,0.0631573349237442
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,2047,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,2047,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,2047,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,2047,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,4095,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,4095,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,4095,0.06964266796906789
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,4095,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,4095,0.06861333549022675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,4095,0.0675786683956782
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,4095,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,4095,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,4095,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,4095,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,4095,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,4095,0.06826133529345195
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,4095,0.06849599877993266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,4095,0.07133866846561432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,8191,0.14267733693122864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,8191,0.1814133326212565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,8191,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,8191,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,8191,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,8191,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,8191,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,8191,0.14114133516947427
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,8191,0.18312533696492514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,8191,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,8191,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,8191,0.11981333295504253
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,8191,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,8191,0.12083199620246887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,16383,0.24472532669703165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,16383,0.34355199337005615
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,16383,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,16383,0.22153067588806152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,16383,0.21913067499796549
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,16383,0.2218666672706604
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,16383,0.21947733561197916
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,16383,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,16383,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,16383,0.34388800462086994
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,16383,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,16383,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,16383,0.2211893399556478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,16383,0.21845332781473795
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,32767,0.4500480095545451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,32767,0.6679893334706625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,32767,0.42717333634694415
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,32767,0.41677331924438477
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,32767,0.41710933049519855
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,32767,0.418997327486674
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,32767,0.41813333829243976
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,32767,0.44731732209523517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,32767,0.6666293144226074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,32767,0.4247893492380778
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,32767,0.4188266595204671
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,32767,0.4164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,32767,0.42052265008290607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,32767,0.4184746742248535
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,65535,0.8577706813812256
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,65535,1.3134400049845378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,65535,0.8231253623962402
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,65535,0.812544027964274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,65535,0.8108373483022054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,65535,0.8115200201670328
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,65535,0.8135680357615153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,65535,0.8628906408945719
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,65535,1.3096960385640461
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,65535,0.8241439660390218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,65535,0.8122026920318604
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,65535,0.8156159718831381
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,65535,0.8139093716939291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,65535,0.8141813278198242
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,1,131071,1.6744052569071453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,2,131071,2.598741372426351
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,4,131071,1.6204800605773926
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,8,131071,1.6010239919026692
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,16,131071,1.598965326944987
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,32,131071,1.6071680386861165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,64,131071,1.595568021138509
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,1,131071,1.6802132924397786
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,2,131071,2.5966933568318686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,4,131071,1.612287998199463
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,8,131071,1.5965867042541504
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,131071,1.594538688659668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,131071,1.6075092951456706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,1,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,64,131071,1.6006827354431152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,1,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,1,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,1,0.01685333376129468
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,1,0.014853333433469137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,1,0.015290666371583939
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,1,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,1,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,1,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,1,0.016149333367745083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,1,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,1,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,3,0.016837333639462788
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,3,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,3,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,3,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,3,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,3,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,3,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,3,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,3,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,3,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,3,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,3,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,3,0.015125333021084467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,3,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,7,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,7,0.0234400009115537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,7,0.016837333639462788
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,7,0.01509333277742068
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,7,0.015637333194414776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,7,0.014773332824309668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,7,0.015119999647140503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,7,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,7,0.022810667753219604
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,7,0.016602666427691776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,7,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,7,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,7,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,7,0.015125333021084467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,15,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,15,0.022789334257443745
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,15,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,15,0.015696000307798386
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,15,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,15,0.015103999525308609
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,15,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,15,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,15,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,15,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,15,0.016842667013406754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,15,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,15,0.014858666807413101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,15,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,32,255,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,31,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,31,0.023557332654794056
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,31,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,31,0.01684800038735072
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,31,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,31,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,31,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,31,0.01868266612291336
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,31,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,31,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,31,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,31,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,31,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,63,0.01729600007335345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,16,511,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,63,0.023141334454218548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,63,0.016976000120242436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,63,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,63,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,63,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,63,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,63,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,63,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,63,0.02311466634273529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,63,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,63,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,63,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,63,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,127,0.016842667013406754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,127,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,127,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,127,0.01658133293191592
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,127,0.01569066693385442
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,127,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,127,0.015184000134468079
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,127,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,127,0.023045333723227184
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,127,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,127,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,127,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,127,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,255,0.019039999693632126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,255,0.02310933421055476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,255,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,255,0.014938666174809137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,255,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,255,0.01525866612792015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,255,0.014842666685581207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,255,0.01599466676513354
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,255,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,255,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,511,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,511,0.02514133354028066
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,511,0.023152001202106476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,511,0.01989866668979327
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,511,0.019066666563351948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,511,0.02073066681623459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,511,0.0200853335360686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,511,0.02515200028816859
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,511,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,511,0.019733333339293797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,511,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,511,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,511,0.020714666694402695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,1023,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,1023,0.0634986658891042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,1023,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,1023,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,1023,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,1023,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,1023,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,1023,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,1023,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,1023,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,1023,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,1023,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,1023,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,1023,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,2047,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,2047,0.10410666465759277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,2047,0.07576533158620198
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,2047,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,2047,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,2047,0.07099199791749318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,2047,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,2047,0.105813334385554
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,2047,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,2047,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,2047,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,2047,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,2047,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,2047,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,4095,0.13039466738700867
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,4095,0.1868799924850464
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,4095,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,4095,0.12593600153923035
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,4095,0.12494400143623352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,4095,0.12492266297340393
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,4095,0.1276639997959137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,4095,0.13004266222318014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,4095,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,4095,0.18602667252222696
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,4095,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,4095,0.1269813378651937
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,4095,0.12593066692352295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,4095,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,8191,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,8191,0.3466240167617798
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,8191,0.23313599824905396
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,8191,0.2344906727472941
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,8191,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,8191,0.23040000597635904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,8191,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,8191,0.34833598136901855
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,8191,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,8191,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,8191,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,8191,0.23143466313680014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,8191,0.22869332631429037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,8191,0.23176000515619913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,16383,0.44185598691304523
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,16383,0.6751573085784912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,16383,0.44492801030476886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,16383,0.438101331392924
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,16383,0.43674667676289874
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,16383,0.4374186595280965
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,16383,0.4391253391901652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,16383,0.4428853193918864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,16383,0.6703786849975586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,16383,0.44355201721191406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,16383,0.4384426673253377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,16383,0.43674135208129883
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,16383,0.43673598766326904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,16383,0.43775999546051025
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,32767,0.861525297164917
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,32767,1.32369065284729
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,32767,0.8567413489023844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,32767,0.853333314259847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,32767,0.851967970530192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,32767,0.8550453186035156
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,32767,0.849232037862142
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,32767,0.866645336151123
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,32767,1.325055996576945
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,32767,0.8567466735839844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,32767,0.8540159861246744
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,32767,0.8523093064626058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,32767,0.853333314259847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,32767,0.8536746501922607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,65535,1.6895999908447266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,65535,2.6460159619649253
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,65535,1.6776533126831055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,65535,1.680389404296875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,65535,1.6789973576863606
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,65535,1.6744106610616047
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,65535,1.6737279891967773
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,65535,1.6855039596557617
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,65535,2.716671943664551
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,65535,1.6817493438720703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,65535,1.6808959643046062
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,65535,1.6827680269877117
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,65535,1.6749226252237956
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,65535,1.6761172612508137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,1,131071,3.3402932484944663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,2,131071,5.339653650919597
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,4,131071,3.33243719736735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,8,131071,3.3355093002319336
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,16,131071,3.330394744873047
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,32,131071,3.335850715637207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,64,131071,3.327317237854004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,1,131071,3.341994603474935
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,2,131071,5.348864237467448
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,8,131071,3.3344853719075522
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,4,131071,3.346101442972819
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,16,131071,3.3235626220703125
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,1,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,1,0.03482133398453394
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,1,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,1,0.021840001145998638
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,32,131071,3.3259572982788086
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,64,131071,3.3389228185017905
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,1,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,1,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,1,0.02309333284695943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,1,0.022181332111358643
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,1,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,1,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,1,0.02276800076166789
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,3,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,3,0.02276800076166789
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,3,0.02207999924818675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,3,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,3,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,3,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,3,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,7,0.02313599983851115
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,7,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,7,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,7,0.02110933264096578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,7,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,7,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,7,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,7,0.03481066723664602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,7,0.02314666658639908
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,7,0.021082667013009388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,7,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,7,0.021104000508785248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,7,0.02219199885924657
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,15,0.024570666253566742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,15,0.03618666778008143
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,15,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,15,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,15,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,15,0.022416000564893086
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,15,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,15,0.023141334454218548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,15,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,15,0.021087999145189922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,15,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,31,0.023141334454218548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,31,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,31,0.0230880007147789
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,31,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,31,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,31,0.021733333667119343
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,31,0.02088533341884613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,31,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,31,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,31,0.021375998854637146
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,31,0.0216799999276797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,31,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,63,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,63,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,63,0.020917333662509918
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,63,0.023578666150569916
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,63,0.021061333517233532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,63,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,63,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,63,0.0230880007147789
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,63,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,63,0.02242133269707362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,63,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,127,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,127,0.02314666658639908
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,127,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,127,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,127,0.020746666938066483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,127,0.022096000611782074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,127,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,127,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,127,0.023103999594847362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,127,0.021781332790851593
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,127,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,127,0.02139200021823247
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,127,0.02144533395767212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,255,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,255,0.03856533269087473
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,255,0.02314666658639908
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,255,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,255,0.02107733239730199
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,255,0.022128000855445862
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,255,0.02183466653029124
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,255,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,255,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,255,0.038586666186650596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,255,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,255,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,255,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,511,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,511,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,511,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,511,0.046767999728520714
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,511,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,511,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,511,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,511,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,511,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,511,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,511,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,511,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,511,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,511,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,1023,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,1023,0.1088693340619405
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,1023,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,1023,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,1023,0.07372266550858815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,1023,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,1023,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,1023,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,1023,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,1023,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,1023,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,1023,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,1023,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,1023,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,2047,0.1890986760457357
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,2047,0.13704533378283182
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,2047,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,2047,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,2047,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,2047,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,2047,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,2047,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,2047,0.1909760038057963
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,2047,0.13140267133712769
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,2047,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,2047,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,2047,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,2047,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,4095,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,4095,0.3534453312555949
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,4095,0.23961599667867026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,4095,0.23517866929372153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,4095,0.23449599742889404
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,4095,0.2362026572227478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,4095,0.22971733411153158
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,4095,0.24644800027211508
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,4095,0.24165334304173788
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,4095,0.3561813433965047
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,4095,0.23756800095240274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,4095,0.23484265804290771
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,4095,0.2344906727472941
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,4095,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,8191,0.6734506289164225
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,8191,0.460970679918925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,8191,0.45346665382385254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,8191,0.447653333346049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,8191,0.4551680088043213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,8191,0.44834665457407635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,8191,0.45074133078257245
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,8191,0.45653335253397626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,8191,0.45073068141937256
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,8191,0.67412797609965
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,8191,0.44868266582489014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,8191,0.44731732209523517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,8191,0.4514133135477702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,8191,0.44389867782592773
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,16383,0.8871253331502279
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,16383,1.3226666450500488
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,16383,0.869376023610433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,16383,0.8690346876780192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,16383,0.8738133112589518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,16383,0.8741546471913656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,16383,0.874837319056193
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,16383,0.8772266705830892
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,16383,1.3233493169148762
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,16383,0.8727893034617106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,16383,0.8659626642862955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,16383,0.8656160036722819
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,16383,0.8710827032725016
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,16383,0.8738133112589518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,32767,1.7273173332214355
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,32767,2.654208024342855
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,32767,1.7256107330322266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,32767,1.7105919520060222
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,32767,1.7136640548706055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,32767,1.711941401163737
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,32767,1.718784014383952
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,32767,1.7317652702331543
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,32767,2.7122348149617515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,32767,1.720138708750407
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,32767,1.7174186706542969
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,32767,1.7181013425191243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,32767,1.7177599271138508
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,32767,1.7095680236816406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,1,65535,3.442351977030436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,2,65535,5.397850672403972
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,4,65535,3.395413398742676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,8,65535,3.391999880472819
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,65535,3.411285400390625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,32,65535,3.4208319981892905
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,64,65535,3.3981494903564453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,1,65535,3.4331305821736655
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,2,65535,5.429930369059245
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,4,65535,3.397461255391439
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,8,65535,3.3923412958780923
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,16,65535,3.399850527445475
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,32,65535,3.417423884073893
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,1,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,1,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,1,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,1,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,1,0.032074667513370514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,1,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,1,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,1,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,1,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,1,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,64,65535,3.411285400390625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,1,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,1,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,1,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,3,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,3,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,3,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,3,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,3,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,3,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,3,0.03141333411137263
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,3,0.03276266654332479
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,7,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,7,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,7,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,7,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,7,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,7,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,7,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,7,0.03827200084924698
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,7,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,7,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,7,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,7,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,7,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,7,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,15,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,15,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,15,0.0365280012289683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,15,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,15,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,15,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,15,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,15,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,15,0.03583466758330663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,15,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,15,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,15,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,15,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,31,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,31,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,31,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,31,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,31,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,31,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,31,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,31,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,31,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,31,0.032431999842325844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,31,0.03141333411137263
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,63,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,63,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,63,0.058703998724619545
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,63,0.032069332897663116
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,63,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,63,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,63,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,63,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,63,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,127,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,127,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,127,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,127,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,127,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,127,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,127,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,127,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,127,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,127,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,127,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,127,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,127,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,127,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,255,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,255,0.05120533208052317
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,255,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,255,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,255,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,255,0.06111466884613037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,255,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,255,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,255,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,255,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,255,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,255,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,511,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,511,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,511,0.10921066999435425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,511,0.07681066791216533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,511,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,511,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,511,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,511,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,511,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,511,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,511,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,511,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,511,0.07918400069077809
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,511,0.07782933115959167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,1023,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,1023,0.18107734123865762
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,1023,0.1367039978504181
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,1023,0.13396799564361572
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,1023,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,1023,0.13157866398493448
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,1023,0.1307360033194224
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,1023,0.1462613344192505
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,1023,0.18175999323527017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,1023,0.14062933127085367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,1023,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,1023,0.13294933239618936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,1023,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,1023,0.13038399815559387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,2047,0.2583893338839213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,2047,0.3295573393503825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,2047,0.24235200881958008
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,2047,0.24440000454584757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,2047,0.23756800095240274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,2047,0.23688532908757529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,2047,0.24302933613459268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,2047,0.25463465849558514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,2047,0.3298986752827962
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,2047,0.2467893362045288
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,2047,0.24132267634073892
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,2047,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,2047,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,2047,0.23652799924214682
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,4095,0.6206133365631104
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,4095,0.4797439972559611
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,4095,0.46610132853190106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,4095,0.45960533618927
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,4095,0.45585068066914874
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,4095,0.4530400037765503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,4095,0.45585068066914874
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,4095,0.47701334953308105
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,4095,0.6178133487701416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,4095,0.4667733510335286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,4095,0.45789865652720135
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,4095,0.45550398031870526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,4095,0.4514133135477702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,4095,0.4538026650746663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,8191,0.9113600254058838
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,8191,1.18886399269104
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,8191,0.9004373550415039
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,8191,0.8966879844665527
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,8191,0.8939519723256429
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,8191,0.8881493409474691
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,8191,0.8857599894205729
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,8191,0.9076053301493326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,8191,1.1912533442179363
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,8191,0.8878080050150553
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,8191,0.8963413238525391
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,8191,0.8925866285959879
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,8191,0.8925866285959879
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,8191,0.8932693004608154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,16383,1.7832959493001301
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,16383,2.3956425984700522
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,16383,1.757354736328125
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,16383,1.7686293919881184
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,16383,1.7532587051391602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,16383,1.7522346178690593
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,16383,1.750864028930664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,16383,1.7905173301696777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,16383,2.394965330759684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,16383,1.7628107070922852
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,16383,1.7505280176798503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,16383,1.7535999615987141
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,16383,1.7477919260660808
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,16383,1.745408058166504
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,1,32767,3.5432106653849282
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,2,32767,4.926122665405273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,4,32767,3.4955946604410806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,8,32767,3.485354741414388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,16,32767,3.4911572138468423
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,32,32767,3.48091729482015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,64,32767,3.47545591990153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,1,32767,3.528362592061361
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,2,32767,5.029546737670898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,4,32767,3.490330696105957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,8,32767,3.4884214401245117
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,16,32767,3.4826294581095376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,1,0.06279466549555461
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,1,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,1,0.11195733149846394
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,1,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,1,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,1,0.05050666630268097
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,1,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,32,32767,3.486037254333496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,64,32767,3.470677375793457
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,1,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,1,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,1,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,1,0.05119466781616211
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,1,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,1,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,1,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,3,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,3,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,3,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,3,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,3,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,3,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,3,0.050848002235094704
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,3,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,3,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,3,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,3,0.11400000254313152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,3,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,3,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,3,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,7,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,7,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,7,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,7,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,7,0.05016533533732096
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,7,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,7,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,7,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,7,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,7,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,7,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,7,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,7,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,7,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,15,0.06280000011126201
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,15,0.11161599556605022
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,15,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,15,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,15,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,15,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,15,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,15,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,15,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,15,0.11331199606259663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,15,0.05221866567929586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,15,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,15,0.05017066498597463
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,15,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,31,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,31,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,31,0.05121066669623057
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,31,0.11332799990971883
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,31,0.05016533533732096
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,31,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,31,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,31,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,31,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,31,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,31,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,31,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,31,0.05017066498597463
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,31,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,63,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,63,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,63,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,63,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,63,0.050527999798456825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,63,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,63,0.050527999798456825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,63,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,63,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,63,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,63,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,63,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,63,0.05119466781616211
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,63,0.05119466781616211
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,127,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,127,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,127,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,127,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,127,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,127,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,127,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,127,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,127,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,127,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,127,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,127,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,127,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,127,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,255,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,255,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,255,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,255,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,255,0.07065600156784058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,255,0.06962666908899943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,255,0.06962666908899943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,255,0.10103999574979146
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,255,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,255,0.0726986676454544
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,255,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,255,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,255,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,255,0.07133333384990692
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,511,0.1508693297704061
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,511,0.19746132691701254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,511,0.13823999961217245
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,511,0.13038399815559387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,511,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,511,0.1293760041395823
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,511,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,511,0.14079999923706055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,511,0.19985065857569376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,511,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,511,0.1307360033194224
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,511,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,511,0.12596266468365988
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,1023,0.3415040175120036
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,1023,0.25224532683690387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,1023,0.23995733261108398
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,1023,0.2300586700439453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,1023,0.22426132361094156
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,1023,0.22562134265899658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,1023,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,1023,0.34252798557281494
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,1023,0.2515679995218913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,1023,0.23995733261108398
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,1023,0.22664533058802286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,1023,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,1023,0.22526933749516806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,1023,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,2047,0.6388053496678671
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,2047,0.4411733150482178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,2047,0.42854400475819904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,2047,0.43025068442026776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,2047,0.42854400475819904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,2047,0.4312746524810791
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,2047,0.6398293177286783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,2047,0.4602880080540975
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,2047,0.44014934698740643
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,2047,0.4275199969609578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,2047,0.42854400475819904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,2047,0.4264959891637166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,2047,0.43025068442026776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,4095,0.8686933517456055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,4095,1.217194636662801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,4095,0.8471893469492594
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,4095,0.8350773652394613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,4095,0.8333653608957926
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,4095,0.838485320409139
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,4095,0.8381439844767252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,4095,0.8755146662394205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,4095,1.2117280165354412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,4095,0.8509439627329508
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,4095,0.8403626283009847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,4095,0.8354132970174154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,4095,0.839680035909017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,4095,0.8357493082682291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,8191,1.6802132924397786
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,8191,2.3582773208618164
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,8191,1.6580266952514648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,8191,1.6453973452250164
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,8191,1.6542720794677734
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,8191,1.6358399391174316
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,8191,1.64027738571167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,8191,1.6841386159261067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,8191,2.3565600713094077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,8191,1.6481280326843262
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,8191,1.6399359703063965
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,8191,1.6430080731709797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,8191,1.6457386016845703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,8191,1.636522610982259
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,511,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,16383,3.3109334309895835
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,2,16383,4.831743876139323
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,4,16383,3.276970545450846
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,1,2047,0.4609653155008952
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,8,16383,3.265023867289225
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,16,16383,3.23908265431722
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,32,16383,3.25
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,64,16383,3.2650187810262046
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,1,16383,3.2914826075236
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,2,16383,4.825781186421712
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,4,16383,3.272533416748047
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,8,16383,3.2718559900919595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,1,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,1,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,1,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,1,0.01119999960064888
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,1,0.012319999436537424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,1,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,1,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,16,16383,3.2612692515055337
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,1,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,32,16383,3.2520532608032227
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,1,0.012389333297808966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,1,0.01303999995191892
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,1,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,1,0.011968000481526056
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,1,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,1,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,3,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,3,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,3,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,3,0.013210666676362356
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,3,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,3,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,3,0.01293333371480306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,3,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,3,0.014032000054915747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,3,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,64,16383,3.2629760106404624
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,3,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,3,0.01201066623131434
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,3,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,3,0.01328533391157786
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,7,0.013861333330472311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,7,0.01250133290886879
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,7,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,7,0.014533333480358124
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,7,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,7,0.012367999802033106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,7,0.012149333953857422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,7,0.013477332890033722
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,7,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,7,0.014362666755914688
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,7,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,7,0.012346666306257248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,7,0.01313599944114685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,7,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,15,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,15,0.013301332791646322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,15,0.015125333021084467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,15,0.013056000073750814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,15,0.011434666812419891
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,15,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,15,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,15,0.013637332866589228
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,15,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,15,0.014090667168299357
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,15,0.01179733375708262
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,15,0.01219733307758967
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,15,0.01301866645614306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,31,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,31,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,31,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,31,0.011034666250149408
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,31,0.012346666306257248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,31,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,31,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,31,0.013823999712864557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,31,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,31,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,31,0.011952000359694162
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,63,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,31,0.013877333452304205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,63,0.013487999637921652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,63,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,63,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,63,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,63,0.012341332932313284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,63,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,63,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,63,0.014058666924635569
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,63,0.01240533341964086
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,63,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,63,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,63,0.01381333296497663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,63,0.013749333719412485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,127,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,127,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,127,0.013749333719412485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,127,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,127,0.012954667210578918
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,127,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,127,0.01258133351802826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,127,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,127,0.011594666788975397
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,127,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,127,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,255,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,255,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,255,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,255,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,255,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,255,0.012186666329701742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,255,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,255,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,255,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,255,0.01303999995191892
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,255,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,255,0.012357333054145178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,511,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,511,0.0161013330022494
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,511,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,511,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,511,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,511,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,511,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,511,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,511,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,511,0.016602666427691776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,511,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,511,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,511,0.01463466634353002
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,511,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,1023,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,1023,0.016602666427691776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,1023,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,1023,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,1023,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,1023,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,1023,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,1023,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,1023,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,1023,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,1023,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,1023,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,1023,0.01525866612792015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,2047,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,2047,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,2047,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,2047,0.015642666568358738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,2047,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,2047,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,2047,0.015573333948850632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,2047,0.017680000513792038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,2047,0.018960000326236088
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,2047,0.016943999876578648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,2047,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,2047,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,2047,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,2047,0.01524266724785169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,4095,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,4095,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,4095,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,4095,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,4095,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,4095,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,4095,0.01693333312869072
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,8191,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,8191,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,8191,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,8191,0.028581333657105763
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,8191,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,8191,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,8191,0.02072000006834666
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,16383,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,16383,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,16383,0.028325334191322327
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,16383,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,16383,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,16383,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,32767,0.03957866628964742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,32767,0.04026666780312856
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,32767,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,32767,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,32767,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,32767,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,32767,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,32767,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,32767,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,32767,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,65535,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,65535,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,65535,0.048122664292653404
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,65535,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,65535,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,65535,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,65535,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,65535,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,65535,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,65535,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,131071,0.0631520003080368
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,131071,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,131071,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,131071,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,131071,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,131071,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,131071,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,131071,0.06280000011126201
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,131071,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,131071,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,131071,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,131071,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,131071,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,131071,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,1,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,1,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,1,0.013013333082199097
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,1,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,1,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,1,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,1,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,1,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,1,0.012842666357755661
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,1,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,1,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,1,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,1,0.012346666306257248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,3,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,3,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,3,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,3,0.01413333291808764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,3,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,3,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,3,0.012917333592971167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,3,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,3,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,3,0.013056000073750814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,7,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,7,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,7,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,7,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,7,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,7,0.013786666095256805
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,7,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,7,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,7,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,7,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,7,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,7,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,7,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,7,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,15,0.01293333371480306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,15,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,15,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,15,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,15,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,15,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,15,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,15,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,15,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,15,0.013866666704416275
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,15,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,15,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,31,0.01258133351802826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,31,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,31,0.012752000242471695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,31,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,31,0.012506666282812754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,31,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,31,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,31,0.012671999633312225
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,31,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,63,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,63,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,63,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,63,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,63,0.012671999633312225
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,63,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,63,0.013962666193644205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,63,0.01293333371480306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,63,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,63,0.01321600005030632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,63,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,63,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,63,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,63,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,127,0.012586666891972223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,127,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,127,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,127,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,127,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,127,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,127,0.013210666676362356
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,127,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,127,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,127,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,127,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,127,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,255,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,255,0.012549333274364471
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,255,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,255,0.011535999675591787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,255,0.01451733335852623
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,255,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,255,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,255,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,255,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,255,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,511,0.014453332871198654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,511,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,511,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,511,0.01239466667175293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,511,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,511,0.013210666676362356
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,511,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,511,0.014602666099866232
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,511,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,511,0.012901333471139273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,511,0.013546666751305262
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,511,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,511,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,1023,0.016602666427691776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,1023,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,1023,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,1023,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,1023,0.012917333592971167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,1023,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,1023,0.013376000026861826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,1023,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,1023,0.01452800010641416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,1023,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,1023,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,1023,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,1023,0.013386666774749756
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,1023,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,2047,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,2047,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,2047,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,2047,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,2047,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,2047,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,2047,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,2047,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,2047,0.014432000617186228
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,2047,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,4095,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,2047,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,4095,0.020469332734743755
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,4095,0.019071999937295914
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,4095,0.01889066646496455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,4095,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,4095,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,4095,0.025610665480295818
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,4095,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,4095,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,4095,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,4095,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,4095,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,8191,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,8191,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,8191,0.024117333193620045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,8191,0.021850667893886566
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,8191,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,8191,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,8191,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,8191,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,8191,0.02242133269707362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,8191,0.02178666740655899
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,16383,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,16383,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,16383,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,16383,0.04266133407751719
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,16383,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,16383,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,16383,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,16383,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,16383,0.04199466605981191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,16383,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,16383,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,32767,0.04404266675313314
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,32767,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,32767,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,32767,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,32767,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,32767,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,32767,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,32767,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,32767,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,32767,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,65535,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,32767,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,65535,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,65535,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,65535,0.061434666315714516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,65535,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,65535,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,65535,0.0798773318529129
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,65535,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,65535,0.06418133278687795
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,65535,0.05459733307361603
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,65535,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,65535,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,65535,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,65535,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,131071,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,131071,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,131071,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,131071,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,131071,0.0986400047938029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,131071,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,131071,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,131071,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,131071,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,131071,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,131071,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,131071,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,131071,0.10410666465759277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,1,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,1,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,1,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,1,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,1,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,1,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,1,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,1,0.013877333452304205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,1,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,1,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,1,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,1,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,1,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,1,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,3,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,3,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,3,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,3,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,3,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,3,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,3,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,3,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,3,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,3,0.012346666306257248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,7,0.013936000565687815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,3,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,7,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,7,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,7,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,7,0.014303999642531076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,7,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,7,0.012357333054145178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,7,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,7,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,7,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,7,0.01258133351802826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,7,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,7,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,15,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,15,0.01301866645614306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,15,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,15,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,15,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,15,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,15,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,15,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,15,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,15,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,15,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,31,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,15,0.012357333054145178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,31,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,31,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,31,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,31,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,31,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,31,0.012506666282812754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,31,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,31,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,31,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,31,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,31,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,31,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,31,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,63,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,63,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,63,0.014720000326633453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,63,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,63,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,63,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,63,0.014501333236694336
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,63,0.01481066644191742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,63,0.013178666432698568
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,63,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,63,0.012362666428089142
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,63,0.012479999413092932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,63,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,63,0.013679999858140945
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,127,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,127,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,127,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,127,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,127,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,127,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,127,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,127,0.014981333166360855
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,127,0.012357333054145178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,127,0.012810666114091873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,127,0.014202666779359182
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,127,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,255,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,255,0.013546666751305262
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,255,0.012597333639860153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,255,0.014271999398867289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,255,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,255,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,255,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,255,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,255,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,511,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,511,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,511,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,511,0.013397333522637686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,511,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,511,0.013536000003417334
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,511,0.013210666676362356
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,511,0.014975999792416891
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,511,0.014805333067973455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,511,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,511,0.013397333522637686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,511,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,1023,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,1023,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,1023,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,1023,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,1023,0.013722666849692663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,1023,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,1023,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,1023,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,1023,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,1023,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,1023,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,1023,0.01303999995191892
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,1023,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,1023,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,2047,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,2047,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,2047,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,2047,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,2047,0.01647466669480006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,2047,0.023904000719388325
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,2047,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,2047,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,2047,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,2047,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,2047,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,4095,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,511,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,4095,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,4095,0.01860800012946129
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,4095,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,4095,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,4095,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,4095,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,4095,0.020714666694402695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,4095,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,4095,0.01904533306757609
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,4095,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,4095,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,8191,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,8191,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,8191,0.024853333830833435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,8191,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,8191,0.03377600014209747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,8191,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,8191,0.030378667016824085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,16383,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,16383,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,16383,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,16383,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,16383,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,16383,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,16383,0.04062933226426443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,16383,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,16383,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,32767,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,32767,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,32767,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,32767,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,32767,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,32767,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,32767,0.061093335350354515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,32767,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,32767,0.05426666637261709
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,32767,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,32767,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,32767,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,32767,0.06145066519578298
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,32767,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,65535,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,65535,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,65535,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,65535,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,65535,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,65535,0.08736532926559448
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,65535,0.08704533179601033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,65535,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,65535,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,65535,0.09660800298055013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,65535,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,65535,0.08362666765848796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,65535,0.08567466338475545
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,65535,0.08704533179601033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,131071,0.12662933270136514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,131071,0.16145599881807962
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,131071,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,131071,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,131071,0.13397333025932312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,131071,0.13312000036239624
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,131071,0.12867732842763266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,131071,0.13703466455141702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,131071,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,131071,0.16145066420237222
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,131071,0.13039466738700867
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,131071,0.1341546674569448
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,131071,0.13805866241455078
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,1,0.014917333920796713
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,131071,0.1358560025691986
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,1,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,1,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,1,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,1,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,1,0.013951999445756277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,1,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,1,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,1,0.012831999609867731
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,1,0.013631999492645264
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,1,0.012586666891972223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,1,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,1,0.013749333719412485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,3,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,3,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,3,0.01440000037352244
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,3,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,3,0.014058666924635569
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,3,0.01332266628742218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,3,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,3,0.015072000523408255
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,3,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,3,0.014202666779359182
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,3,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,3,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,3,0.014096000542243322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,3,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,7,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,7,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,7,0.012506666282812754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,7,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,7,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,131071,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,7,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,7,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,7,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,7,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,7,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,7,0.013925333817799887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,7,0.013754667093356451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,7,0.013530666629473368
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,7,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,15,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,15,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,15,0.013530666629473368
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,15,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,15,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,15,0.013573333621025085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,15,0.012453333785136541
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,15,0.014773332824309668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,15,0.01240533341964086
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,15,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,15,0.013738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,15,0.014096000542243322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,15,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,15,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,31,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,31,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,31,0.012549333274364471
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,31,0.013141332815090815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,31,0.013823999712864557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,31,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,31,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,31,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,31,0.014298666268587112
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,31,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,31,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,31,0.014442666123310724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,63,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,63,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,63,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,63,0.01392000044385592
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,63,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,63,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,63,0.013477332890033722
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,63,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,63,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,63,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,63,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,63,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,63,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,63,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,127,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,127,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,127,0.013850666582584381
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,127,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,127,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,127,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,127,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,127,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,127,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,127,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,127,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,127,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,127,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,255,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,255,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,255,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,255,0.013712000101804733
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,255,0.013365333278973898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,255,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,255,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,255,0.014592000593741735
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,255,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,255,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,255,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,255,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,255,0.013722666849692663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,255,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,511,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,511,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,511,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,511,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,511,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,511,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,511,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,511,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,511,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,511,0.012901333471139273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,511,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,511,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,1023,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,1023,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,1023,0.014858666807413101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,1023,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,1023,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,1023,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,1023,0.02310933421055476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,1023,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,1023,0.015087999403476715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,1023,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,1023,0.014901333798964819
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,1023,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,1023,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,2047,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,2047,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,2047,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,2047,0.016906666258970898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,2047,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,2047,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,2047,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,2047,0.029685333371162415
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,2047,0.023103999594847362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,2047,0.019050666441520054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,2047,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,2047,0.016970666746298473
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,2047,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,2047,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,4095,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,4095,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,4095,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,4095,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,4095,0.022976001103719074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,4095,0.03299733251333237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,4095,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,4095,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,4095,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,8191,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,8191,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,8191,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,8191,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,8191,0.036858665446440377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,8191,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,8191,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,16383,0.05699733396371206
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,16383,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,16383,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,16383,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,16383,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,16383,0.05324266850948334
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,16383,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,16383,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,16383,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,16383,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,16383,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,16383,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,16383,0.05188799897829691
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,16383,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,32767,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,32767,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,32767,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,32767,0.07714133461316426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,32767,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,32767,0.07714133461316426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,32767,0.07884266475836436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,32767,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,32767,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,32767,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,32767,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,32767,0.07610666751861572
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,32767,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,32767,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,65535,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,65535,0.1336373289426168
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,65535,0.16401066382726034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,65535,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,65535,0.12834133704503378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,65535,0.1269813378651937
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,65535,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,65535,0.13038399815559387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,65535,0.16401066382726034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,65535,0.13260799646377563
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,65535,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,65535,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,65535,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,65535,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,131071,0.23278399308522543
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,131071,0.2320906718571981
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,131071,0.2988426685333252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,131071,0.23107733329137167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,131071,0.22016000747680664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,131071,0.22562134265899658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,131071,0.2307413419087728
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,131071,0.23040000597635904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,131071,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,131071,0.3022506634394328
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,131071,0.22528000672658285
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,131071,0.22220800320307413
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,131071,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,131071,0.22357332706451416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,1,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,1,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,1,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,1,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,1,0.014064000298579534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,1,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,1,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,1,0.014853333433469137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,1,0.012453333785136541
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,1,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,1,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,1,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,3,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,3,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,3,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,3,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,3,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,3,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,3,0.014837333311637243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,3,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,3,0.013072000195582708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,3,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,3,0.012549333274364471
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,3,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,3,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,7,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,7,0.01422400027513504
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,7,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,7,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,7,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,7,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,7,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,7,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,7,0.013077333569526672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,7,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,7,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,7,0.012319999436537424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,7,0.015930666277805965
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,15,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,15,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,15,0.013418667018413544
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,15,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,15,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,15,0.014111999422311783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,15,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,15,0.016282666474580765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,15,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,15,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,15,0.0129120002190272
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,15,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,15,0.01320533330241839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,31,0.015967999895413715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,31,0.014101333916187286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,31,0.01302933320403099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,31,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,31,0.01303999995191892
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,31,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,31,0.016271999726692837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,31,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,31,0.013552000125249227
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,31,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,31,0.012576000144084295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,31,0.01314666618903478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,63,0.01597333326935768
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,63,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,63,0.014853333433469137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,63,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,63,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,63,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,63,0.014933332800865173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,63,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,63,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,63,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,63,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,63,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,63,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,127,0.016842667013406754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,127,0.014848000059525171
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,127,0.012805332740147909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,127,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,127,0.012560000022252401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,127,0.012437333663304647
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,127,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,127,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,127,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,127,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,127,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,127,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,127,0.012549333274364471
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,127,0.012810666114091873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,255,0.015295999745527903
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,255,0.014842666685581207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,255,0.012805332740147909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,255,0.012901333471139273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,255,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,255,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,255,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,255,0.015098666151364645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,255,0.014848000059525171
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,255,0.012896000097195307
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,255,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,255,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,255,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,255,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,511,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,511,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,511,0.015477333217859268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,511,0.014842666685581207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,511,0.013237333546082178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,511,0.012794667234023413
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,511,0.013258667041858038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,511,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,511,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,511,0.014848000059525171
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,511,0.013258667041858038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,511,0.014853333433469137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,511,0.012815999488035837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,1023,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,1023,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,1023,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,1023,0.015178666760524115
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,1023,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,1023,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,1023,0.0145066666106383
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,1023,0.028330666323502857
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,1023,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,1023,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,1023,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,1023,0.015130666395028433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,1023,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,2047,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,2047,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,2047,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,2047,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,2047,0.0189280000825723
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,2047,0.018858666221300762
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,2047,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,2047,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,2047,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,2047,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,2047,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,4095,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,4095,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,4095,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,4095,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,4095,0.02661866694688797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,4095,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,4095,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,4095,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,4095,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,4095,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,4095,0.02661866694688797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,4095,0.024853333830833435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,4095,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,8191,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,8191,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,8191,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,8191,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,8191,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,8191,0.04709866642951965
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,8191,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,8191,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,8191,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,8191,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,8191,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,8191,0.04881600042184194
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,8191,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,8191,0.04640533526738485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,16383,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,16383,0.08260799944400787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,16383,0.09796800216039021
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,16383,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,16383,0.07268799841403961
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,16383,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,16383,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,16383,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,16383,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,16383,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,16383,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,16383,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,16383,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,16383,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,32767,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,32767,0.13328533371289572
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,32767,0.1728853384653727
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,32767,0.1256053348382314
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,32767,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,32767,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,32767,0.12289599577585857
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,32767,0.1353386640548706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,32767,0.1384160021940867
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,32767,0.17305066188176474
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,32767,0.12492799758911133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,32767,0.12184533476829529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,32767,0.12219732999801636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,32767,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,1,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,65535,0.2474666635195414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,65535,0.23449599742889404
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,65535,0.32307199637095135
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,65535,0.21744000911712646
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,65535,0.2208426594734192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,65535,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,65535,0.21640533208847046
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,65535,0.24712532758712769
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,65535,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,65535,0.3227306604385376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,65535,0.2187946637471517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,65535,0.22152533133824667
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,65535,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,65535,0.21742399533589682
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,131071,0.4613120158513387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,131071,0.4322986602783203
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,131071,0.6336853504180908
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,131071,0.4106239875157674
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,131071,0.41369601090749103
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,131071,0.40755200386047363
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,131071,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,131071,0.46028268337249756
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,131071,0.4288853406906128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,131071,0.6302826801935831
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,131071,0.4102773269017537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,131071,0.40960534413655597
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,131071,0.40960001945495605
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,1,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,131071,0.4044853448867798
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,1,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,1,0.01293333371480306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,1,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,1,0.012901333471139273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,1,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,1,0.015306666493415833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,1,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,1,0.013594667116800943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,1,0.014730667074521383
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,1,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,1,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,3,0.016469333320856094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,3,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,3,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,3,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,3,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,3,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,3,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,3,0.014943999548753103
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,3,0.014384000251690546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,3,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,3,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,3,0.013082666943470636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,3,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,7,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,7,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,7,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,7,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,7,0.013482666263977686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,7,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,7,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,7,0.01525866612792015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,7,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,7,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,7,0.014842666685581207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,7,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,7,0.013989333063364029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,7,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,15,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,15,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,63,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,15,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,15,0.014490666488806406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,15,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,15,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,15,0.014266667266686758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,15,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,15,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,15,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,15,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,15,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,15,0.012906666845083237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,15,0.014426667243242264
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,31,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,31,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,31,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,31,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,31,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,31,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,31,0.018266666680574417
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,31,0.015125333021084467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,31,0.01458666721979777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,31,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,31,0.016575999557971954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,31,0.016122666498025257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,31,0.014522666732470194
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,31,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,63,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,63,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,63,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,63,0.013023999830087027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,63,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,63,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,63,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,63,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,63,0.015978666643301647
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,63,0.014981333166360855
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,63,0.012896000097195307
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,63,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,63,0.015263999501864115
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,127,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,127,0.01471466695268949
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,127,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,127,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,127,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,127,0.013290667285521826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,127,0.01463466634353002
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,127,0.014442666123310724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,127,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,127,0.01826133330663045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,127,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,127,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,127,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,127,0.013072000195582708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,255,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,255,0.014757333944241205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,255,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,255,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,255,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,255,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,255,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,255,0.01479999969402949
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,255,0.014405333747466406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,255,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,255,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,255,0.014826666563749313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,511,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,511,0.021087999145189922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,511,0.017157333592573803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,511,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,511,0.01479999969402949
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,511,0.015530666957298914
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,511,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,511,0.016832000265518825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,511,0.031727999448776245
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,511,0.015599999576807022
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,511,0.014917333920796713
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,511,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,511,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,1023,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,1023,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,1023,0.018320000420014065
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,1023,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,1023,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,1023,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,1023,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,1023,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,1023,0.02311466634273529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,1023,0.018522666146357853
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,1023,0.01692266638080279
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,1023,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,1023,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,2047,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,2047,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,2047,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,2047,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,2047,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,2047,0.022266666094462078
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,2047,0.021104000508785248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,2047,0.04163199911514918
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,2047,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,2047,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,2047,0.023775999744733173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,2047,0.021253332495689392
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,2047,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,2047,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,4095,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,4095,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,4095,0.058703998724619545
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,4095,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,4095,0.04403733213742574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,4095,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,4095,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,4095,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,4095,0.06205866734186808
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,4095,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,4095,0.04710933566093445
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,4095,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,4095,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,4095,0.04404266675313314
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,8191,0.08937066793441772
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,8191,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,8191,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,8191,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,8191,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,8191,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,8191,0.06826133529345195
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,8191,0.08873599767684937
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,8191,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,8191,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,8191,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,8191,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,8191,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,8191,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,16383,0.14250666896502176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,16383,0.13772799571355185
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,16383,0.17220266660054526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,16383,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,16383,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,16383,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,16383,0.11979732910792033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,16383,0.143696000178655
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,16383,0.13806399703025818
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,16383,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,16383,0.17407999436060587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,16383,0.11844799915949504
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,16383,0.1181013286113739
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,16383,0.11946666240692139
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,32767,0.25122666358947754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,32767,0.24234133958816528
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,32767,0.3258026639620463
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,32767,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,32767,0.21811733643213907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,32767,0.2187946637471517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,32767,0.21982399622599283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,32767,0.2505386670430501
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,32767,0.24064532915751138
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,32767,0.32819199562072754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,32767,0.2259626587231954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,32767,0.22016000747680664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,32767,0.21913599967956543
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,32767,0.21913599967956543
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,65535,0.4667786757151286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,65535,0.44970667362213135
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,65535,0.6297599871953329
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,65535,0.4220586617787679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,65535,0.41710933049519855
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,65535,0.41573333740234375
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,65535,0.41881601015726727
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,65535,0.4657440185546875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,65535,0.4442453384399414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,65535,0.6283946832021078
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,65535,0.4254719813664754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,65535,0.4198400179545085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,65535,0.41778135299682617
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,65535,0.41881601015726727
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,131071,0.854698657989502
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,131071,0.8936106363932291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,131071,1.2363093694051106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,131071,0.822106679280599
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,131071,0.8139093716939291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,131071,0.8128853638966879
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,131071,0.8180053234100342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,131071,0.8959999879201254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,131071,0.8567466735839844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,131071,1.237674633661906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,131071,0.8197120030721029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,131071,0.8063999811808268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,131071,0.814250628153483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,1,0.014965333044528961
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,131071,0.8111786842346191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,1,0.01595199977358182
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,1,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,1,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,1,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,1,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,1,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,1,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,1,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,1,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,1,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,1,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,1,0.014645333091417948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,1,0.01440000037352244
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,3,0.016682667036851246
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,3,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,3,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,3,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,3,0.014965333044528961
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,3,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,3,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,3,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,3,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,3,0.013936000565687815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,7,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,7,0.016458666572968166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,7,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,7,0.015253332753976187
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,7,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,7,0.01441066712141037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,7,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,7,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,7,0.014773332824309668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,7,0.016506666938463848
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,7,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,7,0.014794666320085526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,15,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,15,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,15,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,15,0.014720000326633453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,15,0.01462399959564209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,1,0.014511999984582266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,15,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,15,0.014581333845853806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,15,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,15,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,15,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,15,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,15,0.014607999473810196
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,15,0.014554666976133982
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,31,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,31,0.014901333798964819
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,31,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,31,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,31,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,31,0.014090667168299357
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,31,0.01616000011563301
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,31,0.015429332852363586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,31,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,31,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,31,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,31,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,63,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,63,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,63,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,63,0.015087999403476715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,63,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,63,0.016229332735141117
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,63,0.014389333625634512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,63,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,63,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,63,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,63,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,63,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,63,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,63,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,127,0.015119999647140503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,127,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,127,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,127,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,127,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,127,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,127,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,127,0.014837333311637243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,127,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,127,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,127,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,127,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,127,0.014773332824309668
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,127,0.014474666366974512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,255,0.01701333373785019
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,255,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,255,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,255,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,255,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,255,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,255,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,255,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,255,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,255,0.016682667036851246
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,255,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,255,0.01618133361140887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,255,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,255,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,511,0.01904533306757609
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,511,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,511,0.021407999098300934
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,511,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,511,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,511,0.015189333508412043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,511,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,511,0.018858666221300762
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,511,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,511,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,511,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,511,0.01682666689157486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,511,0.014837333311637243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,511,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,1023,0.02587733417749405
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,1023,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,1023,0.02348800003528595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,1023,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,1023,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,1023,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,1023,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,1023,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,1023,0.021386665602525074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,1023,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,1023,0.023141334454218548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,2047,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,2047,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,2047,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,2047,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,2047,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,2047,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,2047,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,2047,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,2047,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,2047,0.04743466774622599
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,2047,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,2047,0.042319998145103455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,4095,0.0740586668252945
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,4095,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,4095,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,4095,0.07408000032107036
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,4095,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,4095,0.07065066695213318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,4095,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,4095,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,4095,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,4095,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,4095,0.06929600238800049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,4095,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,4095,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,8191,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,8191,0.1283519963423411
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,8191,0.18345600366592407
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,8191,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,8191,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,8191,0.11946666240692139
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,8191,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,8191,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,8191,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,8191,0.18278400103251138
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,8191,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,8191,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,8191,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,8191,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,16383,0.23552000522613525
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,16383,0.24337599674860635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,16383,0.3449173370997111
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,16383,0.2239146629969279
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,16383,0.21878933906555176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,16383,0.22050132354100546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,16383,0.21947733561197916
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,16383,0.23449599742889404
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,16383,0.2450773318608602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,16383,0.3428693215052287
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,16383,0.22766933838526407
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,16383,0.2208426594734192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,16383,0.21981332699457803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,16383,0.21913599967956543
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,32767,0.44970667362213135
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,32767,0.44595734278361004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,32767,0.6714026927947998
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,32767,0.4241066773732503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,32767,0.41831998030344647
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,32767,0.41787731647491455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,32767,0.4154026508331299
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,32767,0.45073068141937256
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,32767,0.44629331429799396
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,32767,0.6679893334706625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,32767,0.4247893492380778
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,32767,0.4164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,32767,0.41881601015726727
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,32767,0.4198400179545085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,65535,0.8765493233998617
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,65535,0.866645336151123
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,65535,1.3124319712320964
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,65535,0.8200533390045166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,65535,0.8152746359507242
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,65535,0.8091306686401367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,65535,0.8159573078155518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,65535,0.874837319056193
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,65535,0.8584533532460531
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,65535,1.3119146823883057
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,65535,0.8251732985178629
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,65535,0.8152799606323242
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,65535,0.8118613560994467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,65535,0.8128800392150879
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,131071,1.7320960362752278
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,131071,1.6837973594665527
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,131071,2.5994240442911782
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,131071,1.615023930867513
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,131071,1.5965867042541504
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,131071,1.602453390757243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,131071,1.5955626169840496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,131071,1.732437292734782
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,131071,1.6686132748921711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,131071,2.5949920018514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,131071,1.608191967010498
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,131071,1.597610632578532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,131071,1.5942026774088542
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,131071,1.5993173917134602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,1,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,1,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,1,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,1,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,1,0.015317333241303762
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,1,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,1,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,1,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,1,0.022810667753219604
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,1,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,1,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,7,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,1,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,1,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,3,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,3,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,3,0.023056000471115112
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,3,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,3,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,3,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,3,0.021333334346612293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,3,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,3,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,3,0.015125333021084467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,3,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,3,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,7,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,7,0.01699200024207433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,7,0.02309866746266683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,7,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,7,0.016437333077192307
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,7,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,7,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,7,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,7,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,7,0.0242399995525678
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,7,0.016890666137139004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,7,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,7,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,7,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,15,0.020746666938066483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,15,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,15,0.022810667753219604
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,15,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,15,0.015125333021084467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,15,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,15,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,15,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,15,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,15,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,15,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,15,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,31,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,31,0.023898666103680927
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,31,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,31,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,31,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,31,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,31,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,31,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,31,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,63,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,63,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,63,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,63,0.016906666258970898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,63,0.014965333044528961
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,63,0.01552533358335495
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,63,0.0206986665725708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,63,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,63,0.02309333284695943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,63,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,63,0.014848000059525171
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,63,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,63,0.015290666371583939
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,127,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,127,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,127,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,127,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,127,0.014959999670584997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,127,0.014842666685581207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,4095,0.06963733335336049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,127,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,127,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,127,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,127,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,127,0.01599466676513354
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,127,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,127,0.015290666371583939
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,255,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,255,0.01905599981546402
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,255,0.02309333284695943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,255,0.016890666137139004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,255,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,255,0.014831999937693277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,255,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,255,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,255,0.01933866615096728
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,255,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,255,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,255,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,255,0.014917333920796713
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,255,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,511,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,511,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,511,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,511,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,511,0.019733333339293797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,511,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,511,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,511,0.02626666675011317
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,511,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,511,0.023786666492621105
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,511,0.018933333456516266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,511,0.020373333245515823
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,511,0.018944000204404194
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,1023,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,1023,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,1023,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,1023,0.0481333335240682
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,1023,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,1023,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,1023,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,1023,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,1023,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,1023,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,1023,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,1023,0.04538666705290476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,1023,0.04404266675313314
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,1023,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,2047,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,2047,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,2047,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,2047,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,2047,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,2047,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,2047,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,2047,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,2047,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,2047,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,2047,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,2047,0.07235733171304067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,2047,0.07099199791749318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,2047,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,4095,0.1346560021241506
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,4095,0.13226667046546936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,4095,0.18687466780344644
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,4095,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,4095,0.12868266304334006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,4095,0.12492799758911133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,4095,0.12492799758911133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,4095,0.1353386640548706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,4095,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,4095,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,4095,0.1858560045560201
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,4095,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,4095,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,4095,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,8191,0.24200532833735147
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,8191,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,8191,0.34833065668741864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,8191,0.23757332563400269
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,8191,0.23448532819747925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,8191,0.23040000597635904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,8191,0.233130673567454
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,8191,0.24371200799942017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,8191,0.23518399397532144
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,8191,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,8191,0.34969600041707355
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,8191,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,8191,0.2300586700439453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,8191,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,16383,0.45789865652720135
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,16383,0.4428853193918864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,16383,0.6707253456115723
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,16383,0.43913066387176514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,16383,0.4384426673253377
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,16383,0.4370773235956828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,16383,0.4387893279393514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,16383,0.45653335253397626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,16383,0.44390400250752765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,16383,0.6703786849975586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,16383,0.442197322845459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,16383,0.4408320188522339
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,16383,0.44049068291982013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,16383,0.44014398256937665
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,32767,0.8857599894205729
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,32767,0.8618666330973307
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,32767,1.3216426372528076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,32767,0.8540213108062744
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,32767,0.8509439627329508
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,32767,0.8519626458485922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,32767,0.8553813298543295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,32767,0.8871200084686279
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,32767,0.8622079690297445
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,3,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,32767,1.3233546415964763
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,32767,0.8536799748738607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,32767,0.851967970530192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,32767,0.8543573220570883
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,32767,0.8495840231577555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,65535,1.7409706115722656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,65535,1.6861866315205891
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,65535,2.6460159619649253
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,65535,1.679360071818034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,65535,1.6747520764668782
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,65535,1.6806987126668294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,65535,1.6737279891967773
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,65535,1.742677370707194
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,65535,1.683791955312093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,65535,2.638160069783529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,65535,1.6757760047912598
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,65535,1.6814079284667969
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,65535,1.681440035502116
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,65535,1.677135944366455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,131071,3.456000010172526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,131071,3.339946746826172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,131071,5.310634613037109
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,131071,3.3334614435831704
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,131071,3.3338025410970054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,63,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,131071,3.3283360799153647
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,131071,3.3194665908813477
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,131071,3.462143898010254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,131071,3.3447252909342446
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,131071,5.336234410603841
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,131071,3.335850715637207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,131071,3.3348426818847656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,131071,3.319808006286621
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,131071,3.3283360799153647
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,1,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,1,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,1,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,1,0.02176533391078313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,1,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,1,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,1,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,1,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,1,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,3,0.03481066723664602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,3,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,3,0.02219199885924657
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,3,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,3,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,3,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,3,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,3,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,7,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,7,0.03148799886306127
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,7,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,7,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,7,0.022426667312781017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,7,0.03133333226044973
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,7,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,7,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,7,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,7,0.03480533262093862
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,7,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,7,0.02242133269707362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,15,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,15,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,15,0.02309866746266683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,15,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,15,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,15,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,15,0.023141334454218548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,15,0.021850667893886566
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,15,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,15,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,31,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,31,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,31,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,31,0.020992000897725422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,31,0.021104000508785248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,31,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,31,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,31,0.024853333830833435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,31,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,31,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,31,0.021082667013009388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,63,0.03173333406448364
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,63,0.03584533433119456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,63,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,63,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,63,0.020773333807786305
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,63,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,63,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,63,0.024853333830833435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,63,0.02242133269707362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,63,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,63,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,127,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,127,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,127,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,127,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,127,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,127,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,127,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,127,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,127,0.03482133398453394
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,127,0.023152001202106476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,127,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,127,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,255,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,255,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,255,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,255,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,255,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,255,0.022517333428064983
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,255,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,255,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,255,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,255,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,255,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,511,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,511,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,511,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,511,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,511,0.04641599953174591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,511,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,511,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,511,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,511,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,511,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,511,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,511,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,511,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,511,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,1023,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,1023,0.07783466577529907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,1023,0.11059199770291646
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,1023,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,1023,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,1023,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,1023,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,1023,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,1023,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,1023,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,1023,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,1023,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,1023,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,1023,0.07303999861081441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,2047,0.144896000623703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,2047,0.1372160017490387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,2047,0.19165867567062378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,2047,0.13124266266822815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,2047,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,2047,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,2047,0.12761066357294717
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,2047,0.1389226714769999
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,2047,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,2047,0.19234132766723633
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,2047,0.13158399860064188
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,2047,0.12719999750455221
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,2047,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,2047,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,4095,0.24644267559051514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,4095,0.3534506559371948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,4095,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,4095,0.23825599749883017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,4095,0.23688532908757529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,4095,0.23756800095240274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,4095,0.3541333278020223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,4095,0.25361067056655884
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,4095,0.24166399240493774
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,4095,0.2409706711769104
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,4095,0.23654399315516153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,4095,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,4095,0.23517866929372153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,8191,0.45721598466237384
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,8191,0.46984533468882245
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,8191,0.6748159726460775
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,8191,0.44970667362213135
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,8191,0.44868266582489014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,8191,0.4500480095545451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,8191,0.4493600130081177
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,8191,0.4667733510335286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,8191,0.460970679918925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,8191,0.6731093724568685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,8191,0.44970667362213135
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,8191,0.44698135058085126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,8191,0.4466346502304077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,8191,0.4490240017573039
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,16383,0.8980639775594076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,16383,0.8837172985076904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,16383,1.3195947011311848
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,16383,0.8721066315968832
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,16383,0.8758613268534342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,16383,0.8645973205566406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,16383,0.8741546471913656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,16383,0.9004320303599039
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,16383,0.879957358042399
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,16383,1.3213013013203938
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,16383,0.8785920143127441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,16383,0.8601600329081217
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,16383,0.8704000314076742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,16383,0.8673333326975504
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,32767,1.7604319254557292
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,32767,1.739946683247884
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,32767,2.7315200169881186
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,32767,1.7177599271138508
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,32767,1.7153600056966145
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,32767,1.7232213020324707
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,32767,1.7269760767618816
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,32767,1.7590559323628743
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,32767,1.743018627166748
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,32767,2.6678667068481445
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,32767,1.7129920323689778
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,32767,1.7146933873494465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,32767,1.7143467267354329
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,32767,1.7143467267354329
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,65535,3.4836479822794595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,65535,3.423914591471354
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,65535,5.380442937215169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,65535,3.403264045715332
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,65535,3.402581214904785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,65535,3.3960959116617837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,65535,3.397120157877604
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,65535,3.4355198542277017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,65535,3.4850133260091147
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,65535,5.5620269775390625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,65535,3.4191414515177407
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,65535,3.4128214518229165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,1,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,1,0.03857066730658213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,65535,3.407146771748861
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,1,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,65535,3.4029226303100586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,1,0.036517334481080375
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,1,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,1,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,1,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,1,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,1,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,1,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,1,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,1,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,3,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,3,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,3,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,3,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,3,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,3,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,3,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,3,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,3,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,3,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,3,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,3,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,7,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,7,0.05733866492907206
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,7,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,7,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,7,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,7,0.035402665535608925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,7,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,7,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,7,0.05393599967161814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,7,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,7,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,7,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,7,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,7,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,15,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,15,0.05528533458709717
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,15,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,15,0.035743998984495796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,15,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,15,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,15,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,15,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,15,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,15,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,15,0.035829332967599235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,15,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,31,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,31,0.05871466795603434
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,31,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,31,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,31,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,31,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,31,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,31,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,31,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,31,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,31,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,63,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,63,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,63,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,63,0.03722133239110311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,63,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,63,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,63,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,63,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,63,0.03584533433119456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,63,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,63,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,63,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,127,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,127,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,4095,0.25190399090449017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,127,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,127,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,127,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,127,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,127,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,127,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,127,0.040287998815377556
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,127,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,127,0.056661332647005715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,127,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,127,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,127,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,255,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,255,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,255,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,255,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,255,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,255,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,255,0.05153066913286845
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,255,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,255,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,255,0.04709866642951965
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,255,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,255,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,511,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,511,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,511,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,511,0.09010666608810425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,511,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,511,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,511,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,511,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,511,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,511,0.1097866694132487
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,511,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,511,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,511,0.07782933115959167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,511,0.07714666426181793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,1023,0.14830933014551798
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,1023,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,1023,0.13858133554458618
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,1023,0.18347734212875366
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,1023,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,1023,0.13209066788355509
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,1023,0.13363200426101685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,1023,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,1023,0.18346667289733887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,1023,0.14455466469128928
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,1023,0.13687466581662497
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,1023,0.12902399897575378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,1023,0.13209600249926248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,1023,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,2047,0.24985599517822266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,2047,0.24780799945195517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,2047,0.33023999134699505
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,2047,0.25804799795150757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,2047,0.24098666508992514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,2047,0.24064000447591147
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,2047,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,2047,0.25088000297546387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,2047,0.3298986752827962
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,2047,0.2481493353843689
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,2047,0.25600000222524005
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,2047,0.23756800095240274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,2047,0.23961599667867026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,2047,0.23859200874964395
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,4095,0.6164480050404867
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,4095,0.4763306776682536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,4095,0.44766398270924884
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,4095,0.4647253354390462
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,4095,0.4599466721216838
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,4095,0.4531253178914388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,4095,0.4575573205947876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,4095,0.4439093271891276
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,4095,0.46609067916870117
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,4095,0.6205493211746216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,4095,0.48110934098561603
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,4095,0.4575573205947876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,4095,0.4575573205947876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,4095,0.4541440010070801
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,8191,0.8395093282063802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,8191,0.920746644337972
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,8191,0.8956533273061117
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,8191,1.1895466645558674
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,8191,0.8901920318603516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,8191,0.8884906768798828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,8191,0.8850773175557455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,8191,0.8381546338399252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,8191,1.1909226576487224
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,8191,0.9188640117645264
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,8191,0.899072011311849
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,8191,0.8942933082580566
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,8191,0.8901973565419515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,8191,0.8915519714355469
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,16383,1.6150186856587727
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,16383,1.7860266367594402
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,16383,1.760090668996175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,16383,2.3980372746785483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,16383,1.7587199211120605
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,16383,1.7669119834899902
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,16383,1.7535999615987141
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,16383,1.6177493731180828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,16383,1.788080056508382
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,16383,1.7529120445251465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,16383,2.400426705678304
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,16383,1.7447147369384766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,16383,1.7515519460042317
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,16383,1.7491626739501953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,32767,3.183098793029785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,32767,3.5276800791422525
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,32767,4.952746709187825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,32767,3.4891093571980796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,32767,3.494229316711426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,32767,3.4829654693603516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,32767,3.4658934275309243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,32767,3.1793600718180337
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,32767,3.536213239034017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,32767,4.926122665405273
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,32767,3.4942239125569663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,32767,3.4795519510904946
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,1,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,1,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,1,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,32767,3.483989397684733
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,1,0.06211199859778086
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,1,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,32767,3.4641920725504556
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,1,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,1,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,1,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,1,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,1,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,1,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,1,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,1,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,1,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,3,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,3,0.11195199688275655
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,3,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,3,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,3,0.05086400111516317
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,3,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,3,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,3,0.11365866661071777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,3,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,3,0.0634933312733968
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,3,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,3,0.05051200091838837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,3,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,3,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,7,0.0631520003080368
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,7,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,7,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,7,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,7,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,7,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,7,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,7,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,7,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,7,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,7,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,7,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,7,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,15,0.11331199606259663
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,15,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,15,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,15,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,15,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,15,0.050853331883748375
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,15,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,15,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,15,0.06314133107662201
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,15,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,15,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,15,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,15,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,15,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,31,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,31,0.061792001128196716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,31,0.09148266911506653
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,31,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,31,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,31,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,31,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,31,0.11230400204658508
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,31,0.06302399933338165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,31,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,31,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,31,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,31,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,31,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,63,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,63,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,63,0.08875200152397156
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,63,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,63,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,63,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,63,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,63,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,63,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,63,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,63,0.0634986658891042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,63,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,63,0.05291733145713806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,63,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,127,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,127,0.07133866846561432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,127,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,127,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,127,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,127,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,127,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,127,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,127,0.06790933509667714
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,127,0.09738133351008098
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,127,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,127,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,127,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,127,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,255,0.10240532954533894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,255,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,255,0.130730668703715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,255,0.08363200227419536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,255,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,255,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,255,0.07132799923419952
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,255,0.11946666240692139
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,255,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,255,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,255,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,255,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,255,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,255,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,511,0.1981333295504252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,511,0.15104533235232034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,511,0.17493865887324014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,511,0.1384106675783793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,511,0.12936000029246011
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,511,0.13294933239618936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,511,0.12902399897575378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,511,0.1532586713631948
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,511,0.19848533471425375
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,511,0.17390932639439902
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,511,0.14114666978518167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,511,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,511,0.130730668703715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,511,0.13108266393343607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,1023,0.3432106574376424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,1023,0.25143466393152875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,1023,0.2752853234608968
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,1023,0.24234666426976523
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,1023,0.2300586700439453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,1023,0.2242506742477417
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,1023,0.22254933913548788
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,1023,0.2501973311106364
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,1023,0.34355199337005615
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,1023,0.276309331258138
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,1023,0.23961599667867026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,1023,0.22869332631429037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,1023,0.22664533058802286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,1023,0.22152533133824667
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,2047,0.45585068066914874
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,2047,0.6394879817962646
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,2047,0.48179733753204346
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,2047,0.4439093271891276
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,2047,0.4288853406906128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,2047,0.4312800168991089
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,2047,0.4241066773732503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,2047,0.46062934398651123
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,2047,0.4828159809112549
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,2047,0.6411946614583334
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,2047,0.44492801030476886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,2047,0.4288853406906128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,2047,0.4288853406906128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,2047,0.429909348487854
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,4095,0.8686933517456055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,4095,1.2161706288655598
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,4095,0.8738133112589518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,4095,0.8374613126118978
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,4095,0.8302826881408691
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,4095,0.8458240032196045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,4095,0.8306453227996826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,4095,0.8690346876780192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,4095,0.8738133112589518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,4095,1.211733341217041
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,4095,0.8458240032196045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,4095,0.8461600144704183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,4095,0.8344000180562338
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,4095,0.8323413530985514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,8191,1.6546133359273274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,8191,1.661786715189616
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,8191,2.353834629058838
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,8191,1.652224063873291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,8191,1.6368692715962727
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,8191,1.6406240463256836
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,7,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,8191,1.6488107045491536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,8191,1.6563199361165364
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,8191,1.6703200340270996
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,8191,2.3490559260050454
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,8191,1.655296007792155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,8191,1.638912041982015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,8191,1.643349329630534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,8191,1.653231938680013
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,16383,3.217578570048014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,16383,3.2866827646891275
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,16383,4.798469225565593
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,16383,3.2839733759562173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,16383,3.250346819559733
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,16383,3.246250788370768
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,16383,3.267754554748535
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,16383,3.2172374725341797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,16383,3.296597480773926
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,16383,3.2711734771728516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,16383,4.836517333984375
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,16383,3.2520532608032227
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,16383,3.2742398579915366
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,16383,3.255125363667806
