framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,1,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,1,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,1,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,1,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,1,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,1,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,3,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,3,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,3,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,3,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,3,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,3,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,3,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,7,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,3,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,7,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,7,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,7,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,7,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,7,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,7,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,7,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,15,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,15,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,15,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,15,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,15,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,15,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,15,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,15,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,15,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,15,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,15,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,15,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,31,0.20987200736999512
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,31,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,31,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,31,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,31,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,31,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,31,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,31,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,63,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,63,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,63,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,63,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,63,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,63,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,63,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,127,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,127,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,127,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,127,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,127,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,127,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,127,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,127,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,127,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,127,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,127,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,127,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,255,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,127,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,255,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,255,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,255,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,255,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,255,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,255,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,255,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,255,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,255,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,511,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,511,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,511,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,511,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,511,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,511,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,511,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,511,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,511,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,511,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,511,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,511,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,511,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,511,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,1023,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,511,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,511,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,1023,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,1023,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,1023,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,1023,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,1023,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,1023,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,1023,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,1023,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,1023,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,1023,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,1023,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,1023,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,2047,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,1023,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,1023,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,2047,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,1023,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,2047,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,2047,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,2047,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,2047,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,2047,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,2047,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,2047,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,2047,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,2047,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,2047,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,2047,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,2047,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,2047,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,2047,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,4095,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,4095,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,4095,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,4095,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,4095,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,4095,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,4095,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,4095,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,4095,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,4095,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,4095,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,4095,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,4095,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,4095,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,4095,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,4095,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,8191,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,8191,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,8191,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,8191,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,8191,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,8191,0.019904000063737232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,8191,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,8191,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,8191,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,8191,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,8191,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,16383,0.03852800031503042
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,8191,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,8191,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,8191,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,16383,0.022389332453409832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,8191,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,8191,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,16383,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,16383,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,16383,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,16383,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,16383,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,16383,0.02014933278163274
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,16383,0.021530665457248688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,16383,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,16383,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,16383,0.020037333170572918
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,16383,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,16383,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,32767,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,16383,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,16383,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,32767,0.03384533276160558
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,32767,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,32767,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,32767,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,32767,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,32767,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,32767,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,32767,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,32767,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,32767,0.027637332677841187
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,32767,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,32767,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,32767,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,32767,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,32767,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,65535,0.034927998979886375
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,65535,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,65535,0.04248000184694926
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,65535,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,65535,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,65535,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,65535,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,65535,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,65535,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,65535,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,65535,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,65535,0.02808533360560735
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,65535,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,65535,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,65535,0.02796799937884013
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,65535,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,131071,0.05810666580994924
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,131071,0.06256533165772755
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,131071,0.0701279987891515
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,131071,0.042021334171295166
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,131071,0.0537013312180837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,131071,0.053717335065205894
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,131071,0.05589866638183594
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,131071,0.05541866521040598
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,128,131071,0.05608533322811127
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,131071,0.04200533529122671
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,131071,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,131071,0.03389866650104523
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,131071,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,131071,0.03392533212900162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,131071,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,128,131071,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,1,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,1,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,1,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,1,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,1,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,1,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,3,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,3,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,3,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,7,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,7,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,7,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,7,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,7,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,7,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,7,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,7,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,7,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,15,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,15,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,15,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,15,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,15,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,15,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,15,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,15,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,15,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,31,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,31,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,31,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,63,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,63,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,63,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,63,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,63,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,63,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,63,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,63,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,63,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,63,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,127,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,63,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,127,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,63,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,127,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,127,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,127,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,127,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,127,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,127,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,255,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,127,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,127,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,255,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,255,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,255,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,255,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,511,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,511,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,511,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,511,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,511,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,511,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,511,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,511,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,511,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,511,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,511,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,511,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,511,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,511,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,511,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,511,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,1023,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,1023,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,1023,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,1023,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,1023,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,1023,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,1023,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,1023,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,1023,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,1023,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,2047,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,1023,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,2047,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,1023,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,1023,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,2047,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,1023,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,1023,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,2047,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,2047,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,2047,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,2047,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,2047,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,2047,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,2047,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,2047,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,2047,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,2047,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,2047,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,4095,0.022554665803909302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,4095,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,4095,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,4095,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,4095,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,4095,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,4095,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,4095,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,4095,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,4095,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,4095,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,4095,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,4095,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,4095,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,4095,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,4095,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,8191,0.0303413321574529
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,8191,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,8191,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,8191,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,8191,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,8191,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,8191,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,8191,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,8191,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,8191,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,8191,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,8191,0.020319999506076176
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,8191,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,8191,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,8191,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,8191,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,16383,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,16383,0.03196800003449122
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,16383,0.02565866708755493
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,16383,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,16383,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,16383,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,16383,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,16383,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,16383,0.026181332767009735
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,16383,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,16383,0.02204799900452296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,16383,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,16383,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,16383,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,16383,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,16383,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,32767,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,32767,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,32767,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,32767,0.036650667587916054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,32767,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,32767,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,32767,0.026127999027570088
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,32767,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,32767,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,32767,0.028688001135985058
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,32767,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,32767,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,65535,0.05952000121275584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,32767,0.02587733417749405
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,32767,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,32767,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,32767,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,65535,0.052245333790779114
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,65535,0.05277866621812185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,65535,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,65535,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,65535,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,65535,0.05495466788609823
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,65535,0.05340800185998281
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,65535,0.05433600147565206
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,65535,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,65535,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,65535,0.034202667574087776
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,65535,0.03091199944416682
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,65535,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,65535,0.029743999242782593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,65535,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,131071,0.07632000247637431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,131071,0.08679466446240743
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,131071,0.0803306649128596
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,131071,0.09220266342163086
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,131071,0.07798933486143748
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,131071,0.0776800016562144
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,131071,0.07970666885375977
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,128,131071,0.07897066573301952
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,131071,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,131071,0.05754666527112325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,131071,0.05346133311589559
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,131071,0.046853333711624146
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,131071,0.04651199777921041
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,131071,0.048298666874567665
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,131071,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,128,131071,0.04924266537030538
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,1,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,1,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,1,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,1,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,1,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,1,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,1,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,1,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,1,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,1,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,1,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,1,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,3,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,3,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,3,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,3,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,3,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,3,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,3,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,3,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,7,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,7,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,7,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,7,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,7,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,7,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,7,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,7,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,7,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,7,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,7,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,15,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,15,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,15,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,15,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,15,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,15,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,15,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,15,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,31,0.011946666985750198
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,31,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,31,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,31,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,31,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,31,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,31,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,63,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,31,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,63,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,63,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,63,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,63,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,63,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,63,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,127,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,63,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,127,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,127,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,127,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,127,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,127,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,127,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,127,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,127,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,127,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,127,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,127,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,255,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,255,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,255,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,255,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,255,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,255,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,511,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,511,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,511,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,511,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,511,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,511,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,511,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,511,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,511,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,511,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,511,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,1023,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,511,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,511,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,1023,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,1023,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,1023,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,1023,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,1023,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,1023,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,1023,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,1023,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,1023,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,1023,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,1023,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,1023,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,2047,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,1023,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,1023,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,1023,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,2047,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,2047,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,2047,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,2047,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,2047,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,2047,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,2047,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,2047,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,2047,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,2047,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,2047,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,2047,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,2047,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,2047,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,2047,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,4095,0.028197333216667175
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,4095,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,4095,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,4095,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,4095,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,4095,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,4095,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,4095,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,4095,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,4095,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,4095,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,4095,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,4095,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,8191,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,4095,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,4095,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,4095,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,8191,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,8191,0.02979733298222224
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,8191,0.01970133309563001
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,8191,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,8191,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,8191,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,8191,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,8191,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,8191,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,8191,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,8191,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,8191,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,8191,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,8191,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,8191,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,16383,0.030826665461063385
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,16383,0.03312533348798752
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,16383,0.03432533393303553
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,16383,0.026074667771657307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,16383,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,16383,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,16383,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,16383,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,16383,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,16383,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,16383,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,16383,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,16383,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,32767,0.058037335673967995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,16383,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,16383,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,16383,0.02146666745344798
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,32767,0.04889066517353058
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,32767,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,32767,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,32767,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,32767,0.04922133187452952
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,32767,0.04752533137798309
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,32767,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,32767,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,32767,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,32767,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,32767,0.033861334125200905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,32767,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,65535,0.07971199850241344
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,65535,0.09392000238100688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,32767,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,32767,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,32767,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,65535,0.07593066493670146
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,65535,0.07240533332029979
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,65535,0.07176533341407776
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,65535,0.052943999568621315
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,65535,0.07148266832033794
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,65535,0.07152000069618225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,65535,0.07186666627724965
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,65535,0.050661335388819374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,65535,0.04979733129342397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,131071,0.12377066413561504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,65535,0.04253333310286204
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,65535,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,65535,0.043290664752324425
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,65535,0.04288533329963684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,65535,0.0430026650428772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,131071,0.134661336739858
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,131071,0.16709866126378378
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,131071,0.12778666615486145
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,131071,0.12402666608492534
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,131071,0.12340799967447917
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,131071,0.12435199817021687
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,128,131071,0.12388799587885539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,131071,0.0743999977906545
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,131071,0.06491200129191081
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,128,131071,0.06461333235104878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,131071,0.07459733386834462
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,131071,0.08489066362380981
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,131071,0.0662613312403361
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,131071,0.06419200201829274
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,131071,0.06605866551399231
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,1,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,1,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,1,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,1,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,1,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,1,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,3,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,1,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,1,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,3,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,3,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,3,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,3,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,3,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,3,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,3,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,3,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,7,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,7,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,7,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,7,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,7,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,7,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,7,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,7,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,7,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,7,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,15,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,15,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,15,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,15,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,15,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,15,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,15,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,15,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,15,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,31,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,31,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,15,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,15,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,31,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,31,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,31,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,31,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,31,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,63,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,31,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,31,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,63,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,63,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,63,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,63,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,63,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,63,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,63,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,63,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,63,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,127,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,127,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,127,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,127,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,127,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,127,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,127,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,127,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,255,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,127,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,127,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,127,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,127,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,255,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,255,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,255,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,255,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,255,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,511,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,255,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,255,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,511,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,255,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,255,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,255,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,255,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,511,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,511,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,511,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,511,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,511,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,511,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,511,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,511,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,511,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,511,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,511,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,511,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,511,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,1023,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,1023,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,1023,0.017770666629076004
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,1023,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,1023,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,1023,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,1023,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,1023,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,1023,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,1023,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,1023,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,1023,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,1023,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,1023,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,1023,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,1023,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,2047,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,2047,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,2047,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,2047,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,2047,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,2047,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,2047,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,2047,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,2047,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,2047,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,2047,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,2047,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,2047,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,4095,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,2047,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,2047,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,2047,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,4095,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,4095,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,4095,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,4095,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,4095,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,4095,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,4095,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,4095,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,4095,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,4095,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,4095,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,4095,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,4095,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,4095,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,4095,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,8191,0.029002666473388672
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,8191,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,8191,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,8191,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,8191,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,8191,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,8191,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,8191,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,8191,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,8191,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,8191,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,16383,0.05198933184146881
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,8191,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,8191,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,8191,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,16383,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,8191,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,8191,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,16383,0.04855466882387797
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,16383,0.055776000022888184
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,16383,0.047093331813812256
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,16383,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,16383,0.04494399825731913
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,16383,0.04484800000985464
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,16383,0.031871999303499855
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,16383,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,32767,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,16383,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,16383,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,16383,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,16383,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,16383,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,32767,0.0724533349275589
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,32767,0.07037333150704701
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,16383,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,32767,0.08196799953778584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,32767,0.09367466966311137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,32767,0.07451733450094859
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,32767,0.07178133229414622
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,32767,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,32767,0.07148799796899159
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,32767,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,32767,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,32767,0.0510506679614385
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,32767,0.041722665230433144
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,32767,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,32767,0.04063999901215235
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,65535,0.12407466769218445
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,32767,0.04112533231576284
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,65535,0.12336533268292744
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,65535,0.12936000029246011
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,65535,0.1628213326136271
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,65535,0.1269973317782084
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,65535,0.12471999724706014
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,65535,0.1244053343931834
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,65535,0.12410133083661397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,65535,0.07314133147398631
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,65535,0.07209066549936931
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,65535,0.08268799881140391
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,65535,0.06507200002670288
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,65535,0.0642080008983612
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,65535,0.06382933259010315
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,65535,0.06331733365853627
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,131071,0.22156266371409097
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,65535,0.06306666632493337
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,131071,0.22126932938893637
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,131071,0.23760000864664713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,131071,0.30507733424504596
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,131071,0.2290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,131071,0.22180799643198648
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,131071,0.2221119999885559
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,131071,0.10661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,128,131071,0.2223200003306071
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,131071,0.12134933471679688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,131071,0.11757333079973857
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,131071,0.14613866806030273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,131071,0.107013334830602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,131071,0.10658666491508484
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,1,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,131071,0.10585066676139832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,128,131071,0.10641066233317058
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,1,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,1,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,1,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,1,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,1,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,1,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,1,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,1,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,1,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,1,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,1,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,3,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,1,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,1,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,3,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,3,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,1,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,3,0.014495999862750372
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,3,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,3,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,3,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,3,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,3,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,3,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,3,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,3,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,7,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,7,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,7,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,7,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,7,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,7,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,7,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,7,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,7,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,7,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,15,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,15,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,15,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,15,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,15,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,15,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,15,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,15,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,15,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,31,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,31,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,15,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,15,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,31,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,31,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,31,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,31,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,31,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,31,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,31,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,31,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,31,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,31,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,31,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,63,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,63,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,63,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,63,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,63,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,63,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,63,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,63,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,63,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,63,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,63,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,127,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,127,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,127,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,127,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,127,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,127,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,127,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,127,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,127,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,127,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,127,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,127,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,255,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,127,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,255,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,255,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,255,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,255,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,255,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,255,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,255,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,255,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,255,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,255,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,255,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,255,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,255,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,255,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,511,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,511,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,511,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,511,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,511,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,511,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,511,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,511,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,511,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,511,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,511,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,511,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,511,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,511,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,1023,0.01444799949725469
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,511,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,1023,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,1023,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,1023,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,1023,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,1023,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,1023,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,1023,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,1023,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,1023,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,2047,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,1023,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,1023,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,1023,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,1023,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,1023,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,1023,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,2047,0.026144000391165417
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,2047,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,2047,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,2047,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,2047,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,2047,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,2047,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,2047,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,2047,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,2047,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,2047,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,2047,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,2047,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,4095,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,2047,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,2047,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,4095,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,4095,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,4095,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,4095,0.022346665461858112
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,4095,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,4095,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,4095,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,4095,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,4095,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,4095,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,4095,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,4095,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,4095,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,4095,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,8191,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,8191,0.04979733129342397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,4095,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,8191,0.051301335295041404
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,8191,0.032069332897663116
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,8191,0.05402666827042898
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,8191,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,8191,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,8191,0.04372266431649526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,8191,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,8191,0.04275199770927429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,8191,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,16383,0.07587733368078868
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,8191,0.03107733279466629
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,8191,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,8191,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,8191,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,8191,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,16383,0.07597866654396057
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,16383,0.09301867087682088
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,16383,0.04660800099372864
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,16383,0.07158400118350983
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,16383,0.07002666592597961
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,16383,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,16383,0.0710506687561671
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,16383,0.04005866746107737
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,16383,0.06943466762701671
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,16383,0.06894400219122569
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,16383,0.048938666780789696
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,32767,0.13272533814112344
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,16383,0.05190933247407278
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,16383,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,16383,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,32767,0.12398399909337361
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,16383,0.040847999354203544
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,32767,0.13057066996892294
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,32767,0.1713013251622518
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,32767,0.1293653349081675
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,32767,0.12637333075205484
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,32767,0.06294399996598561
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,32767,0.1250933309396108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,32767,0.061253334085146584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,32767,0.12415466705958049
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,32767,0.0765226682027181
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,32767,0.0705386648575465
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,32767,0.08666132887204488
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,32767,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,32767,0.06151466568311056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,32767,0.06163733204205831
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,65535,0.23463465770085654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,65535,0.22444266080856323
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,65535,0.23846399784088135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,65535,0.32920000950495404
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,65535,0.23296000560124716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,65535,0.22621333599090576
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,65535,0.22579733530680338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,65535,0.22573866446812949
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,65535,0.12929600477218628
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,65535,0.10502400000890096
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,65535,0.11622933546702068
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,65535,0.15758400162061056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,131071,0.6405119895935059
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,65535,0.10617599884668986
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,65535,0.10512533783912659
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,65535,0.10524800419807434
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,131071,0.4227199951807658
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,65535,0.10507733623186748
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,131071,0.4490186770757039
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,131071,0.23480000098546347
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,131071,0.4418293237686157
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,131071,0.43465598424275714
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,131071,0.4213600158691406
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,131071,0.4230506817499797
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,128,131071,0.4195839961369832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,131071,0.2084640065828959
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,131071,0.2996693253517151
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,131071,0.19279466072718301
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,131071,0.19022933642069498
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,131071,0.18933866421381632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,131071,0.18996800978978476
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,1,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,128,131071,0.18968000014623007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,1,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,1,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,1,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,1,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,1,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,1,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,1,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,1,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,1,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,1,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,3,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,1,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,1,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,3,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,3,0.013424000392357508
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,3,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,3,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,3,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,3,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,3,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,3,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,3,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,3,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,3,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,3,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,3,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,3,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,7,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,7,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,7,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,7,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,7,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,7,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,7,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,7,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,7,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,7,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,7,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,15,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,15,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,7,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,7,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,7,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,15,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,15,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,15,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,15,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,15,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,15,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,15,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,15,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,15,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,31,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,15,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,15,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,15,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,31,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,31,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,31,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,31,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,31,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,31,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,31,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,31,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,31,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,31,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,31,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,31,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,63,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,63,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,63,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,63,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,63,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,63,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,63,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,63,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,63,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,63,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,127,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,63,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,63,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,127,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,63,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,127,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,127,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,127,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,127,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,127,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,127,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,127,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,127,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,127,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,127,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,127,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,255,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,127,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,127,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,127,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,255,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,255,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,255,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,255,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,255,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,255,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,255,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,255,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,255,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,255,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,255,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,255,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,255,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,511,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,511,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,511,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,511,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,511,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,511,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,511,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,511,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,511,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,511,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,511,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,511,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,511,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,511,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,511,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,1023,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,511,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,1023,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,1023,0.027642667293548584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,1023,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,1023,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,1023,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,1023,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,1023,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,1023,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,1023,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,1023,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,1023,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,1023,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,1023,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,1023,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,1023,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,2047,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,2047,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,2047,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,2047,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,2047,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,2047,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,2047,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,2047,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,2047,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,2047,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,2047,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,2047,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,2047,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,4095,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,2047,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,2047,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,2047,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,4095,0.04222933451334635
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,4095,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,4095,0.05100266635417938
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,4095,0.05439466734727224
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,4095,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,4095,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,4095,0.04221333563327789
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,4095,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,4095,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,4095,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,4095,0.02889599899450938
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,4095,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,4095,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,4095,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,8191,0.06685333450635274
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,4095,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,8191,0.078575998544693
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,8191,0.0763733337322871
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,8191,0.09480533003807068
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,8191,0.0697920024394989
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,8191,0.06797866523265839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,8191,0.06668266654014587
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,8191,0.06715733309586842
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,8191,0.052570665876070656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,8191,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,8191,0.0516533354918162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,16383,0.1313866674900055
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,8191,0.038319999972979225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,8191,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,8191,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,8191,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,16383,0.11937600374221802
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,8191,0.0366293340921402
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,16383,0.13141333063443503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,16383,0.17177599668502808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,16383,0.12058132886886597
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,16383,0.12282666563987732
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,16383,0.060559997955958046
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,16383,0.11999467015266418
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,16383,0.11854400237401326
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,16383,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,16383,0.07287999987602234
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,16383,0.0867786705493927
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,16383,0.06229333579540253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,16383,0.061610668897628784
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,16383,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,16383,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,32767,0.23635733127593994
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,32767,0.2372373342514038
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,32767,0.3296639919281006
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,32767,0.131632000207901
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,32767,0.2274293303489685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,32767,0.22345600525538126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,32767,0.22195200125376383
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,32767,0.22277865807215372
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,32767,0.2209119995435079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,32767,0.12237333257993062
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,32767,0.15762666861216226
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,32767,0.10776533683141072
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,32767,0.10624000430107117
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,65535,0.6392800013224283
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,32767,0.10594666997591655
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,32767,0.10584533214569092
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,32767,0.10502400000890096
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,65535,0.4505973259607951
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,65535,0.4562026659647624
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,65535,0.4405759970347087
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,65535,0.4317973454793294
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,65535,0.43246932824452716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,65535,0.4350026845932007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,65535,0.43346134821573895
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,65535,0.22025599082310995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,65535,0.19664533933003744
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,65535,0.19614400466283163
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,65535,0.23618666330973306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,131071,0.869488000869751
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,65535,0.30111465851465863
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,65535,0.19884266455968222
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,131071,0.8386399745941162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,65535,0.19641600052515665
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,65535,0.19615467389424643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,131071,0.8751839796702067
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,131071,1.2627253532409668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,131071,0.41462934017181396
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,131071,0.8515199820200602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,131071,0.8424479961395264
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,131071,0.3759680191675822
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,131071,0.8345119953155518
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,128,131071,0.8355253537495931
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,131071,0.44747201601664227
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,131071,0.5819040139516195
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,131071,0.37940800189971924
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,131071,0.37582401434580487
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,131071,0.3766080141067505
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,128,131071,0.3755893309911092
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,1,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,1,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,1,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,1,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,1,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,1,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,1,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,1,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,1,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,1,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,1,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,1,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,1,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,3,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,1,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,1,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,1,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,3,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,3,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,3,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,3,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,3,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,3,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,3,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,3,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,3,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,3,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,3,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,3,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,3,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,3,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,3,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,7,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,7,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,7,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,7,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,7,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,7,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,7,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,7,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,7,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,7,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,7,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,7,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,7,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,7,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,7,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,15,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,15,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,15,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,15,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,15,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,15,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,15,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,15,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,15,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,15,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,15,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,31,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,31,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,15,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,15,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,15,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,15,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,31,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,31,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,31,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,31,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,31,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,31,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,31,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,31,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,31,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,31,0.013552000125249227
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,31,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,31,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,31,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,31,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,63,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,63,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,63,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,63,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,63,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,63,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,63,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,63,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,63,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,63,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,63,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,63,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,63,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,63,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,63,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,127,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,127,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,127,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,127,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,127,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,127,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,127,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,127,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,127,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,127,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,127,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,127,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,127,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,127,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,127,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,255,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,127,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,255,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,255,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,255,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,255,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,255,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,255,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,255,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,255,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,255,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,255,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,255,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,255,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,511,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,255,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,255,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,255,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,511,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,511,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,511,0.019871999820073444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,511,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,511,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,511,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,511,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,511,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,511,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,511,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,511,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,1023,0.029637334247430164
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,511,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,511,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,511,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,511,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,1023,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,1023,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,1023,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,1023,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,1023,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,1023,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,1023,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,1023,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,1023,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,1023,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,2047,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,1023,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,2047,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,2047,0.04249600072701772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,2047,0.04284800092379252
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,2047,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,1023,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,1023,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,1023,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,1023,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,2047,0.045514668027559914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,2047,0.04411733150482178
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,2047,0.04167999823888143
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,2047,0.031136001149813335
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,2047,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,2047,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,4095,0.07231999933719635
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,2047,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,2047,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,2047,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,4095,0.06770666440327962
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,2047,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,2047,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,4095,0.07856533428033192
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,4095,0.09297066926956177
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,4095,0.0694400022427241
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,4095,0.06773866713047028
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,4095,0.06772266825040181
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,4095,0.06738666693369548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,4095,0.048826664686203
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,4095,0.05202133456865946
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,4095,0.04978133241335551
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,4095,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,4095,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,4095,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,4095,0.037045332292715706
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,4095,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,8191,0.1265600025653839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,8191,0.11990400155385335
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,8191,0.13268799583117166
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,8191,0.07520000139872234
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,8191,0.16955200831095377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,8191,0.08692800005276997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,8191,0.12335999806722005
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,8191,0.11827733119328816
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,8191,0.1188266674677531
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,8191,0.06033066908518473
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,8191,0.11990933616956075
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,8191,0.07635733485221863
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,8191,0.0626986672480901
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,8191,0.061146666606267296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,8191,0.060458665092786155
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,8191,0.06001066664854685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,16383,0.22275733947753906
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,16383,0.23229332764943442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,16383,0.22051199277242026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,16383,0.23894933859507242
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,16383,0.32551999886830646
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,16383,0.22788800795873007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,16383,0.22386133670806885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,16383,0.22356265783309937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,16383,0.12892799576123556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,16383,0.12610133488972983
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,16383,0.1557813286781311
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,16383,0.10844266414642334
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,16383,0.10705600182215373
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,16383,0.10638399918874104
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,16383,0.10660266876220703
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,16383,0.10674666364987691
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,32767,0.44724265734354657
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,32767,0.4581013520558675
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,32767,0.6330666542053223
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,32767,0.4418133497238159
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,32767,0.43479466438293457
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,32767,0.43251200517018634
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,32767,0.4325386683146159
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,32767,0.4342879851659139
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,32767,0.23457066218058267
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,32767,0.2220053275426229
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,32767,0.1960960030555725
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,32767,0.2978666623433431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,32767,0.20025600989659628
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,65535,1.2527413368225098
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,32767,0.19702933231989542
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,65535,0.8333653608957926
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,32767,0.19637866814931235
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,32767,0.19619733095169067
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,65535,0.870522658030192
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,65535,0.44554134209950763
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,65535,0.8743680318196615
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,65535,0.8537279764811198
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,65535,0.8366719881693522
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,65535,0.8353919982910156
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,65535,0.8352800210316976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,65535,0.4182240168253581
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,65535,0.5795733531316122
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,65535,0.3799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,65535,0.3762133518854777
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,65535,0.3761386473973592
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,65535,0.3774079879124959
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,131071,1.6472907066345215
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,65535,0.3752586841583252
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,131071,1.7181973457336426
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,131071,1.6990240414937336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,131071,2.4888426462809243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,131071,1.6757866541544597
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,131071,1.6396160125732422
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,131071,0.7392906347910563
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,131071,1.6397706667582195
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,128,131071,1.648362636566162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,131071,0.8651999632517496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,131071,0.8046879768371582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,131071,1.1437119642893474
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,1,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,128,131071,0.735210657119751
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,131071,0.7337066332499186
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,131071,0.7344746589660645
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,1,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,131071,0.7332266966501871
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,1,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,1,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,1,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,1,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,1,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,1,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,1,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,1,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,1,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,1,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,3,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,1,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,1,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,1,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,1,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,3,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,3,0.020682666450738907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,3,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,3,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,3,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,3,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,3,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,3,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,3,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,3,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,3,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,3,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,3,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,7,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,3,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,7,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,3,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,7,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,7,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,7,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,7,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,7,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,7,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,7,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,7,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,7,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,7,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,7,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,7,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,7,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,7,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,15,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,15,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,15,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,15,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,15,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,15,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,15,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,15,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,15,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,15,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,15,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,15,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,15,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,15,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,15,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,31,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,15,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,31,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,31,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,31,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,31,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,31,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,31,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,31,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,31,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,31,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,31,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,31,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,31,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,31,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,31,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,63,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,63,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,63,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,63,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,63,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,63,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,63,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,63,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,63,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,63,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,63,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,63,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,63,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,127,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,63,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,63,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,127,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,127,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,127,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,127,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,127,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,127,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,127,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,127,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,127,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,255,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,127,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,127,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,127,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,127,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,255,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,127,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,127,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,255,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,255,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,255,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,255,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,255,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,255,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,255,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,255,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,255,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,511,0.03271999955177307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,255,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,255,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,255,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,255,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,255,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,511,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,511,0.024469333390394848
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,511,0.03186133255561193
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,511,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,511,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,511,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,511,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,511,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,511,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,511,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,511,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,511,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,511,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,511,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,511,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,1023,0.04985600213209788
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,1023,0.04762666424115499
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,1023,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,1023,0.04308266441027323
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,1023,0.041178666055202484
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,1023,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,1023,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,1023,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,1023,0.04196799794832865
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,1023,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,1023,0.033029332756996155
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,1023,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,1023,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,1023,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,1023,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,1023,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,2047,0.07806933422883351
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,2047,0.07481599847475688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,2047,0.09390399853388469
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,2047,0.07030400137106578
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,2047,0.06798399984836578
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,2047,0.06673599779605865
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,2047,0.038165333370367684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,2047,0.06749333441257477
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,2047,0.06764799853165944
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,2047,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,2047,0.047872001926104225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,2047,0.05275199810663859
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,4095,0.1727893352508545
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,2047,0.037402667105197906
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,2047,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,2047,0.0367253323396047
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,2047,0.03600533306598663
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,4095,0.12052800258000691
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,4095,0.08513066172599792
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,4095,0.1320746640364329
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,4095,0.127509335676829
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,4095,0.12060266733169556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,4095,0.12035199999809265
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,4095,0.11963199575742085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,4095,0.1200320025285085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,4095,0.07217066486676534
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,4095,0.08922132849693298
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,4095,0.06233066817124685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,8191,0.32682667175928753
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,4095,0.06089066465695699
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,8191,0.2261120080947876
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,4095,0.059818665186564125
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,4095,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,4095,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,8191,0.23843199014663696
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,8191,0.23470399777094522
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,8191,0.22477332750956217
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,8191,0.2248106598854065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,8191,0.2237280011177063
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,8191,0.22195732593536377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,8191,0.13854400316874185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,8191,0.12229333321253459
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,8191,0.10497066378593445
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,16383,0.4527253309885661
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,8191,0.16034666697184244
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,8191,0.10779199997584026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,8191,0.10777599612871806
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,8191,0.10566932956377666
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,8191,0.10532800356547038
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,16383,0.4530986547470093
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,16383,0.6393599907557169
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,16383,0.43902401129404706
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,16383,0.30161599318186444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,16383,0.4359360138575236
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,16383,0.43591467539469403
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,16383,0.4373386700948079
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,16383,0.4350026845932007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,16383,0.24420267343521118
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,16383,0.2199839949607849
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,16383,0.19885333379109701
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,16383,0.19739733139673868
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,16383,0.1959893306096395
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,16383,0.1961066722869873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,16383,0.19620800018310547
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,32767,0.8801866372426351
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,32767,0.8658133347829183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,32767,0.8391573429107666
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,32767,1.2958079973856609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,32767,0.8449599742889404
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,32767,0.839354674021403
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,32767,0.8379253546396891
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,32767,0.8400853474934896
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,32767,0.45431466897328693
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,32767,0.41486398379007977
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,32767,0.5823893149693807
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,32767,0.37808001041412354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,32767,0.3765973250071208
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,32767,0.3819146553675334
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,65535,2.608799934387207
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,65535,1.6630132993062336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,65535,1.6553866068522136
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,65535,1.6515679359436035
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,32767,0.3776906728744507
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,32767,0.3765973250071208
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,65535,1.7327680587768555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,65535,1.6924266815185547
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,65535,1.6516532897949219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,65535,1.6536800066630046
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,65535,0.803178628285726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,65535,0.8807360331217448
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,65535,1.1683093706766765
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,65535,0.7385439872741699
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,65535,0.7382506529490153
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,65535,0.7362133661905924
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,65535,0.7350666522979736
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,65535,0.7328266302744547
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,131071,3.4357067743937173
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,131071,3.3633225758870444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,131071,5.2237545649210615
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,131071,3.295999844868978
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,131071,3.2886025110880532
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,131071,3.2856054306030273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,131071,3.278042793273926
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,128,131071,3.290992101033529
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,131071,1.722117265065511
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,131071,1.4508426984151204
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,131071,1.4531839688618977
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,131071,1.5759572982788086
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,131071,2.370757261912028
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,131071,1.4527734120686848
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,131071,1.450218677520752
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,1,0.03014400104681651
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,1,0.02237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,1,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,1,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,128,131071,1.449295997619629
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,1,0.02826666583617528
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,1,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,1,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,1,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,1,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,1,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,1,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,1,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,1,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,1,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,3,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,1,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,1,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,3,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,3,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,3,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,3,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,3,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,3,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,3,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,3,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,3,0.04372266431649526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,3,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,3,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,3,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,7,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,3,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,3,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,3,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,7,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,7,0.02997333308060964
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,7,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,7,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,7,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,7,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,7,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,7,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,7,0.04419733087221781
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,7,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,7,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,7,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,7,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,7,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,7,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,15,0.02980799973011017
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,15,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,15,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,15,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,15,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,15,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,15,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,15,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,15,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,15,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,15,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,15,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,31,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,15,0.025834667185942333
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,31,0.020069333414236706
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,15,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,15,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,15,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,31,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,31,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,31,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,31,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,31,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,31,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,31,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,31,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,31,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,31,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,31,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,31,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,31,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,63,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,31,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,63,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,63,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,63,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,63,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,63,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,63,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,63,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,63,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,63,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,63,0.04353600243727366
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,63,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,63,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,63,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,63,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,63,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,127,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,127,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,127,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,127,0.020373333245515823
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,127,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,127,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,127,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,127,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,127,0.04413333535194397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,127,0.026517334083716076
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,127,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,127,0.026821332673231762
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,127,0.018325333793958027
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,255,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,127,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,127,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,127,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,255,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,255,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,255,0.05012266834576925
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,255,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,255,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,255,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,255,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,255,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,255,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,255,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,255,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,255,0.026906666656335194
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,255,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,511,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,511,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,255,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,255,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,511,0.05753066639105479
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,511,0.05133333305517832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,511,0.058186665177345276
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,511,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,511,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,511,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,511,0.04001066585381826
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,511,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,511,0.04165333261092504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,511,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,511,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,511,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,511,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,511,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,1023,0.06771733363469441
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,1023,0.08450133601824443
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,1023,0.07969066500663757
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,1023,0.09597333272298177
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,1023,0.05792533357938131
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,1023,0.07044266661008199
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,1023,0.06805333495140076
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,1023,0.06651733318964641
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,1023,0.06762666503588359
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,1023,0.07222400108973186
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,1023,0.05587733288606008
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,2047,0.13219733039538065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,1023,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,1023,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,1023,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,1023,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,1023,0.03603200117746989
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,2047,0.1405280033747355
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,2047,0.17695999145507812
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,2047,0.12308266758918762
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,2047,0.12062399586041768
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,2047,0.11961066722869873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,2047,0.1202186644077301
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,2047,0.12000000476837158
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,2047,0.09921600421269734
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,2047,0.08269333342711131
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,2047,0.09477866689364116
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,2047,0.06597866614659627
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,2047,0.0626933326323827
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,2047,0.06195199986298879
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,2047,0.06048533320426941
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,2047,0.06090133388837179
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,4095,0.24741866191228232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,4095,0.2408906618754069
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,4095,0.3288000027338664
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,4095,0.22859734296798706
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,4095,0.1323306659857432
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,4095,0.22579733530680338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,4095,0.1127893328666687
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,4095,0.22474133968353271
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,4095,0.22537599007288614
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,4095,0.22446932395299277
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,4095,0.15475199619928995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,4095,0.16616533199946085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,4095,0.10884267091751099
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,4095,0.10737066467603047
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,4095,0.10735467076301575
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,4095,0.10598933696746826
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,8191,0.4344746669133504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,8191,0.46010132630666095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,8191,0.4562506675720215
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,8191,0.6345920165379842
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,8191,0.4384053150812785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,8191,0.4341706832249959
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,8191,0.43434667587280273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,8191,0.432645320892334
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,8191,0.199562668800354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,8191,0.26129599412282306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,8191,0.23188267151514688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,8191,0.30610666672388714
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,8191,0.20355733235677084
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,8191,0.19975467522939047
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,8191,0.1984106699625651
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,8191,0.19774399201075235
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,16383,0.861466646194458
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,16383,0.856655995051066
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,16383,0.8924533526102701
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,16383,0.8978292942047119
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,16383,1.2791519959767659
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,16383,0.8631733258565267
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,16383,0.8539253075917562
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,16383,0.3807946840922038
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,16383,0.3787413438161214
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,16383,0.3784533341725667
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,16383,0.8571146329243978
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,16383,0.47362132867177326
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,16383,0.4260480006535848
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,16383,0.5848160187403361
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,16383,0.3829493522644043
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,16383,0.3781866629918416
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,32767,1.7451732953389485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,32767,1.73963197072347
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,32767,2.6558826764424643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,32767,1.692346731821696
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,32767,1.6857013702392578
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,32767,1.6813599268595378
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,32767,1.6867733001708984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,32767,1.679632027943929
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,32767,0.8993066946665446
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,32767,0.8177546660105387
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,32767,1.1476960182189941
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,32767,0.7440373102823893
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,32767,0.7432746887207031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,32767,0.7413226763407389
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,32767,0.7385760148366293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,32767,0.7400213082631429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,65535,3.4624694188435874
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,65535,3.3218774795532227
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,65535,3.421519915262858
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,65535,5.378447850545247
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,65535,3.3554134368896484
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,65535,3.310474713643392
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,65535,2.3671840031941733
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,65535,3.3254400889078775
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,128,65535,3.311744054158529
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,65535,1.7482399940490723
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,65535,1.5980374018351238
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,65535,1.469871997833252
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,65535,1.4624959627787273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,65535,1.4656480153401692
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,1,0.05064533154169718
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,65535,1.464863936106364
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,1,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,128,65535,1.461957295735677
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,1,0.04717866579691569
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,1,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,1,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,1,0.02754666656255722
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,1,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,1,0.028021333118279774
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,1,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,1,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,1,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,1,0.04318933188915253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,1,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,1,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,3,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,1,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,1,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,3,0.05083733300367991
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,3,0.03794133414824804
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,3,0.046757335464159645
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,3,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,3,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,3,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,3,0.027722666660944622
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,3,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,3,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,3,0.0782773345708847
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,3,0.04788800080617269
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,3,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,3,0.026394667724768322
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,3,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,3,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,7,0.05072000126043955
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,7,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,7,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,7,0.047269334395726524
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,7,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,7,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,7,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,7,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,7,0.07887466748555501
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,7,0.04282666742801666
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,15,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,7,0.04788800080617269
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,7,0.026799999177455902
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,7,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,15,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,7,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,7,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,15,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,7,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,15,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,15,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,15,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,15,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,15,0.028090665737787884
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,15,0.07826666533946991
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,15,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,15,0.04248533149560293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,15,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,15,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,15,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,15,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,15,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,31,0.051029334465662636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,31,0.03741333385308584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,31,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,31,0.029125332832336426
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,31,0.027989332874615986
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,31,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,31,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,31,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,31,0.07827199995517731
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,31,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,31,0.04331199824810028
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,31,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,31,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,31,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,31,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,31,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,63,0.05052266518274943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,63,0.037605332831541695
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,63,0.04807466765244802
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,63,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,63,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,63,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,63,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,63,0.027642667293548584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,63,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,63,0.07850133379300435
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,63,0.048245335618654885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,63,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,63,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,63,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,63,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,63,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,127,0.05306666592756907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,127,0.04340266684691111
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,127,0.05207466582457224
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,127,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,127,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,127,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,127,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,127,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,127,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,127,0.08007466793060303
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,127,0.04960533479849497
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,127,0.045514668027559914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,127,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,127,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,127,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,127,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,255,0.06810133159160614
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,255,0.058143998185793556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,255,0.057146668434143066
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,255,0.08604799707730611
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,255,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,255,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,255,0.04279999931653341
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,255,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,255,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,255,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,255,0.04080000023047129
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,255,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,255,0.05823466678460439
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,255,0.04710933566093445
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,255,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,255,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,511,0.09289600451787312
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,511,0.0862613320350647
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,511,0.09303999940554301
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,511,0.07417066891988118
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,511,0.07212799787521362
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,511,0.0687253326177597
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,511,0.07048533360163371
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,511,0.06995200117429097
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,511,0.09922666351000468
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,511,0.07009600102901459
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,511,0.06414400041103363
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,511,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,1023,0.16598400473594666
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,511,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,511,0.038005332152048744
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,511,0.038959999879201256
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,511,0.038693333665529885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,1023,0.14593600233395895
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,1023,0.14250133434931436
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,1023,0.1260533332824707
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,1023,0.12086932857831319
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,1023,0.12226133545239766
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,1023,0.06397333244482677
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,1023,0.11995200316111247
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,1023,0.11987732847531636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,1023,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,1023,0.12463999787966411
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,1023,0.09637332955996196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,1023,0.09814932942390442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,1023,0.07049066821734111
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,1023,0.06302933394908905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,1023,0.06079466640949249
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,2047,0.24699733654658
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,2047,0.2503733237584432
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,2047,0.3046826720237732
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,2047,0.17529600858688354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,2047,0.23124800125757852
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,2047,0.22567999362945557
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,2047,0.22408533096313477
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,2047,0.2244159976641337
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,2047,0.22233066956202188
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,2047,0.1483786702156067
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,2047,0.164383997519811
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,2047,0.11776533722877502
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,2047,0.11183466513951619
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,2047,0.10938666264216106
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,2047,0.10867733756701152
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,2047,0.10762133200963338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,4095,0.44008533159891766
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,4095,0.434224009513855
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,4095,0.4317813316980998
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,4095,0.47119998931884766
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,4095,0.5798399845759074
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,4095,0.44145600001017254
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,4095,0.4339040120442708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,4095,0.4318559964497884
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,4095,0.27503466606140137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,4095,0.24920000632603964
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,4095,0.2916106581687927
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,4095,0.21003733078638712
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,4095,0.20442666610081991
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,4095,0.20204800367355347
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,4095,0.20040533939997354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,4095,0.19941333929697672
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,8191,0.8284320036570231
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,8191,0.9071306387583414
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,8191,1.1319786707560222
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,8191,0.4673279921213786
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,8191,0.8679040273030599
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,8191,0.8586453596750895
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,8191,0.8579893112182617
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,8191,0.8527466456095377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,8191,0.8538133303324381
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,8191,0.44733866055806476
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,8191,0.5394773483276367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,8191,0.3936320145924886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,8191,0.3862080176671346
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,8191,0.38521067301432294
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,8191,0.3827253182729085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,8191,0.3827253182729085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,16383,1.6138453483581543
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,16383,1.7152585983276367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,16383,1.8077227274576824
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,16383,2.361658732096354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,16383,1.7410346666971843
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,16383,1.7184373537699382
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,16383,1.7217973073323567
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,16383,1.7184213002522786
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,16383,0.7549973328908285
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,16383,0.8506133556365967
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,16383,0.8417066733042399
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,16383,1.041210651397705
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,16383,0.7608426411946615
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,16383,0.7563199996948242
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,32767,3.531087875366211
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,16383,0.7502133051554362
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,16383,0.7496053377787272
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,32767,3.182250658671061
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,32767,4.962490717569987
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,32767,3.405322710673014
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,128,32767,3.3630240758260093
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,32767,3.373258590698242
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,32767,1.6181813875834148
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,32767,1.6316852569580078
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,32767,1.4947093327840169
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,32767,3.368501345316569
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,32767,3.3862826029459634
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,1,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,32767,1.4818719228108723
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,128,32767,1.481946627298991
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,32767,2.0948425928751626
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,32767,1.4860639572143555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,32767,1.4858400026957195
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,1,0.06170133252938589
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,1,0.08320533235867818
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,1,0.05204799771308899
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,1,0.04549333453178406
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,1,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,1,0.04413333535194397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,1,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,1,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,1,0.14574399590492249
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,1,0.0846026639143626
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,1,0.0790880024433136
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,3,0.08588799834251404
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,1,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,1,0.03238933285077413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,3,0.04541866481304169
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,1,0.0316746657093366
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,1,0.032298666735490165
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,3,0.08683733145395915
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,3,0.060533334811528526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,3,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,3,0.04378133515516917
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,3,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,3,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,3,0.044213334719340004
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,3,0.14563199877738953
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,3,0.07993599772453308
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,3,0.08480532964070638
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,3,0.03387200087308884
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,3,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,3,0.03198933353026708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,7,0.08649067083994548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,7,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,3,0.031914666295051575
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,7,0.06118399898211161
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,7,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,7,0.051829333106676735
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,7,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,7,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,7,0.044106667240460716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,7,0.145797332127889
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,7,0.08457066615422566
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,7,0.08044266700744629
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,7,0.04473066826661428
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,7,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,15,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,7,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,7,0.03183999905983607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,7,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,15,0.08646399776140849
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,15,0.08634133140246074
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,15,0.06136533121267954
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,15,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,15,0.04496000210444132
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,15,0.04538666705290476
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,15,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,15,0.0843999981880188
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,15,0.14588266611099243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,15,0.08004266520341237
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,31,0.06165333092212677
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,15,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,15,0.03495999922355016
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,15,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,15,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,15,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,31,0.08673066894213359
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,31,0.08660266796747844
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,31,0.05169600248336792
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,31,0.045968001087506614
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,31,0.044079999128977455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,31,0.04397333165009817
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,31,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,31,0.145989328622818
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,31,0.08413867155710857
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,31,0.08083733419577281
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,31,0.04468800127506256
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,31,0.03572266548871994
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,31,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,31,0.032245332996050514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,63,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,31,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,63,0.08693333466847737
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,63,0.06033066908518473
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,63,0.0865760048230489
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,63,0.052282666166623436
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,63,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,63,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,63,0.03507733345031738
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,63,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,63,0.14623467127482095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,63,0.08469866712888081
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,127,0.0701279987891515
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,63,0.08036266764005025
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,63,0.04437333345413208
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,63,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,63,0.03193599979082743
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,63,0.03193599979082743
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,127,0.05460800230503082
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,127,0.0904266635576884
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,127,0.0883786678314209
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,127,0.060085331400235496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,127,0.054245332876841225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,127,0.053743998209635414
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,127,0.05398933092753092
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,127,0.1452906628449758
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,127,0.08538132905960083
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,127,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,255,0.11529599626859029
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,127,0.08062399923801422
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,127,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,255,0.06676800052324931
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,127,0.036176001032193504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,127,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,127,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,255,0.09737066427866618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,255,0.10062932968139648
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,255,0.0986346701780955
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,255,0.07316799958546956
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,255,0.06636266907056172
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,255,0.06562666594982147
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,255,0.06805333495140076
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,255,0.15548266967137656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,255,0.08247999846935272
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,255,0.052789335449536644
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,255,0.042805333932240806
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,511,0.17155200242996216
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,255,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,255,0.038917332887649536
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,255,0.03873066604137421
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,511,0.16874132553736368
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,511,0.14596800009409586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,511,0.12390933434168498
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,511,0.1204746663570404
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,511,0.11954133709271748
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,511,0.11888532837231953
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,511,0.11556800206502278
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,511,0.18175999323527017
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,511,0.1207413375377655
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,511,0.11760000387827556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,511,0.07609599828720093
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,1023,0.3147413333257039
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,511,0.06514133512973785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,511,0.06229333579540253
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,511,0.06203199923038483
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,511,0.06169066826502482
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,1023,0.21253865957260132
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,1023,0.27314666906992596
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,1023,0.25097066164016724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,1023,0.16888533035914102
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,1023,0.18252267440160116
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,1023,0.22139199574788412
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,1023,0.21335999170939127
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,1023,0.212442676226298
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,1023,0.2101866602897644
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,1023,0.23432532946268717
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,1023,0.12012799580891927
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,1023,0.10943466424942017
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,1023,0.1053706705570221
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,1023,0.1046453317006429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,1023,0.10553066929181416
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,2047,0.47388799985249835
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,2047,0.45230400562286377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,2047,0.5899466673533121
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,2047,0.4185440142949422
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,2047,0.4100373188654582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,2047,0.310805340607961
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,2047,0.4076053301493327
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,2047,0.4077226718266805
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,2047,0.40485866864522296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,2047,0.2648640076319377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,2047,0.33389333883921307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,2047,0.20966933170954385
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,2047,0.1978879968325297
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,2047,0.19402132431666055
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,2047,0.1920693318049113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,2047,0.19112000862757364
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,4095,0.8623999754587809
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,4095,0.8564213116963705
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,4095,1.1312479972839355
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,4095,0.8171093463897705
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,4095,0.8042773405710856
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,4095,0.8038400014241537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,4095,0.3813120126724243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,4095,0.8007840315500895
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,4095,0.8055626551310221
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,4095,0.36322665214538574
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,4095,0.5335733493169149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,4095,0.44790399074554443
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,4095,0.5639146566390991
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,4095,0.370746652285258
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,4095,0.3654239972432454
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,4095,0.363103985786438
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,8191,1.681290626525879
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,8191,1.610640048980713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,8191,1.6427253087361653
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,8191,2.247546672821045
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,8191,1.6337332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,8191,1.061733325322469
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,8191,1.6105066935221355
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,8191,1.6031467119852703
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,8191,0.7076533635457357
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,8191,1.6017279624938965
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,8191,0.9175893465677897
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,8191,0.8118879795074463
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,8191,0.7235146363576254
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,8191,0.7119519710540771
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,8191,0.7074453035990397
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,8191,0.7058719793955485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,16383,3.210480054219564
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,16383,3.4062134424845376
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,16383,4.738095919291179
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,16383,3.307248115539551
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,16383,3.238666534423828
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,16383,3.249786694844564
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,16383,3.2287467320760093
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,128,16383,3.2440694173177085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,16383,1.6817760467529297
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,16383,1.5413333574930828
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,16383,1.4110399881998699
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,16383,2.0589332580566406
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,16383,1.397264003753662
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,16383,1.3941814104715984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,16383,1.3928160667419434
TRTLLM,1.2.0rc5,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,128,16383,1.3904852867126465
