framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,1,0.01097600037852923
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,1,0.011429333438475927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,1,0.01109333336353302
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,1,0.01191466674208641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,1,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,1,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,3,0.011231999844312668
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,3,0.011098666737476984
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,3,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,3,0.01109333336353302
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,3,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,3,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,3,0.011519999553759893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,3,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,3,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,3,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,7,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,7,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,7,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,7,0.01267733300725619
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,7,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,15,0.013397333522637686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,15,0.0143306665122509
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,15,0.04287466903527578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,31,0.012282667060693106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,31,0.013493333011865616
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,31,0.014394666999578476
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,31,0.011589333415031433
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,31,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,31,0.01314666618903478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,63,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,63,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,63,0.011930666863918304
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,63,0.01232533281048139
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,63,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,63,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,63,0.012389333297808966
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,63,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,127,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,127,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,127,0.012282667060693106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,127,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,127,0.01157333329319954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,127,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,127,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,127,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,127,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,255,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,255,0.011616000284751257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,255,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,255,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,255,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,255,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,255,0.01109333336353302
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,255,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,255,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,511,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,511,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,511,0.014645333091417948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,511,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,511,0.013178666432698568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,511,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,511,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,511,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,511,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,1023,0.014730667074521383
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,1023,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,1023,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,511,0.012831999609867731
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,1023,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,1023,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,1023,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,1023,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,1023,0.012890666723251343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,1023,0.015706667055686314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,1023,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,1023,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,1023,0.014560000350077948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,1023,0.014544000228246054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,2047,0.014501333236694336
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,2047,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,2047,0.014917333920796713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,2047,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,4095,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,4095,0.02757866680622101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,4095,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,4095,0.022117334107557934
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,4095,0.019178666174411774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,4095,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,4095,0.016021333634853363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,8191,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,8191,0.028410665690898895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,8191,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,8191,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,8191,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,8191,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,8191,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,8191,0.020799999435742695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,16383,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,16383,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,16383,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,16383,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,16383,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,16383,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,16383,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,16383,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,16383,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,32767,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,32767,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,32767,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,32767,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,32767,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,32767,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,32767,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,32767,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,32767,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,32767,0.03484266748030981
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,65535,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,65535,0.04711466530958811
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,32767,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,65535,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,65535,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,65535,0.04197866717974345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,65535,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,65535,0.042677332957585655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,65535,0.050853331883748375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,65535,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,65535,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,65535,0.04301333427429199
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,65535,0.04197866717974345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,65535,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,65535,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,2,131071,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,1,131071,0.06724800169467926
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,4,131071,0.06483200192451477
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,8,131071,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,16,131071,0.06180266539255778
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,32,131071,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,1,131071,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1,1,64,131071,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,4,131071,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,8,131071,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,2,131071,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,32,131071,0.06214933097362518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,16,131071,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1,1,64,131071,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,1,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,1,0.022277332842350006
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,1,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,1,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,1,0.012938667088747025
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,3,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,3,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,3,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,3,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,3,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,3,0.012896000097195307
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,3,0.013503999759753546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,3,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,7,0.012826666235923767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,3,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,7,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,7,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,7,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,7,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,15,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,15,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,15,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,15,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,31,0.020197333147128422
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,31,0.011941333611806234
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,31,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,31,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,31,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,31,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,63,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,63,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,63,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,63,0.011509332805871964
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,63,0.011359999577204386
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,63,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,63,0.011557333171367645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,63,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,63,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,127,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,127,0.01250133290886879
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,127,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,127,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,127,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,127,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,127,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,127,0.011567999919255575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,127,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,127,0.011717333147923151
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,127,0.011274666835864386
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,255,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,255,0.012543999900420507
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,255,0.011333333949247995
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,255,0.014303999642531076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,255,0.03071466585000356
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,255,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,255,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,255,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,255,0.012543999900420507
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,255,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,255,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,511,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,511,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,511,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,511,0.012821332861979803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,511,0.013989333063364029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,511,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,1023,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,1023,0.012671999633312225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,1023,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,1023,0.012826666235923767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,1023,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,2047,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,2047,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,2047,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,2047,0.012890666723251343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,2047,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,2047,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,4095,0.017727999637524288
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,4095,0.01708799973130226
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,4095,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,4095,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,4095,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,8191,0.027973333994547527
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,8191,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,8191,0.023183998962243397
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,8191,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,8191,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,8191,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,8191,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,16383,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,16383,0.045050665736198425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,16383,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,16383,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,16383,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,16383,0.04131199916203817
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,16383,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,16383,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,16383,0.045066664616266884
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,16383,0.046426668763160706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,16383,0.043338666359583534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,16383,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,32767,0.04333333174387614
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,32767,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,32767,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,32767,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,32767,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,32767,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,32767,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,32767,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,32767,0.04506133496761322
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,32767,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,32767,0.0529120018084844
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,32767,0.05492266515890757
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,32767,0.05189333359400431
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,65535,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,65535,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,65535,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,65535,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,65535,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,65535,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,65535,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,65535,0.08124800026416779
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,65535,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,65535,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,65535,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,65535,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,65535,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,1,131071,0.0918239951133728
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,65535,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,8,131071,0.10237866640090942
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,16,131071,0.10615467031796773
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,2,131071,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,4,131071,0.09114133318265279
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,1,131071,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,2,1,64,131071,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,32,131071,0.10718933741251628
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,2,131071,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,4,131071,0.09185066819190979
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,32,131071,0.10543466607729594
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,2,1,64,131071,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,8,131071,0.10240532954533894
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,16,131071,0.10444800059000652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,1,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,1,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,1,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,1,0.011242666592200598
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,1,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,1,0.01097600037852923
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,1,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,1,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,1,0.011482667177915573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,1,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,3,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,3,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,3,0.011098666737476984
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,3,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,3,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,3,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,3,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,7,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,7,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,7,0.011221333096424738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,7,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,7,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,7,0.011978667229413986
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,15,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,15,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,15,0.011834666132926941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,31,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,31,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,31,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,31,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,31,0.011941333611806234
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,63,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,63,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,63,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,63,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,63,0.012752000242471695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,127,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,127,0.011589333415031433
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,127,0.011285333583752314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,127,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,127,0.012144000579913458
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,127,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,127,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,127,0.011941333611806234
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,255,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,255,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,255,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,255,0.010735999792814255
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,255,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,255,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,255,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,255,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,255,0.011957333733638128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,255,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,255,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,511,0.014645333091417948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,511,0.012815999488035837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,511,0.021130666136741638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,511,0.013658666362365087
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,511,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,1023,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,1023,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,1023,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,1023,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,1023,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,1023,0.03159466634194056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,1023,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,1023,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,1023,0.012842666357755661
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,2047,0.017375999440749485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,2047,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,2047,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,2047,0.019754666835069656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,2047,0.03365333378314972
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,2047,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,2047,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,4095,0.026288000245889027
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,4095,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,4095,0.018394666413466137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,4095,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,4095,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,8191,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,8191,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,8191,0.027989332874615986
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,8191,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,8191,0.03107733279466629
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,8191,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,8191,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,8191,0.028666667640209198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,8191,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,8191,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,8191,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,16383,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,16383,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,16383,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,16383,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,16383,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,16383,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,16383,0.037178667883078255
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,16383,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,16383,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,16383,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,16383,0.03789333254098892
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,16383,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,16383,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,32767,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,32767,0.06554666658242543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,32767,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,32767,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,32767,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,32767,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,32767,0.062133332093556724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,32767,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,32767,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,32767,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,32767,0.057664001981417336
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,32767,0.06246933341026306
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,32767,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,65535,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,32767,0.06108266611893972
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,65535,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,65535,0.08532800277074178
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,65535,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,65535,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,65535,0.08566932876904805
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,65535,0.08295466502507527
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,65535,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,65535,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,65535,0.10240532954533894
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,65535,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,1,131071,0.13619200388590494
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,65535,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,65535,0.08738133311271667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,65535,0.08567466338475545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,2,131071,0.17272533973058066
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,4,131071,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,8,131071,0.13772799571355185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,16,131071,0.13807466626167297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,32,131071,0.13961066802342734
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,4,1,64,131071,0.13926399747530618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,2,131071,0.1718613306681315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,1,131071,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,4,131071,0.13448533415794373
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,4,1,64,131071,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,32,131071,0.14029332995414734
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,8,131071,0.13704533378283182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,16,131071,0.13775466879208884
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,1,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,1,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,1,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,1,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,1,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,1,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,1,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,3,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,3,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,3,0.013429333766301474
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,3,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,3,0.011477333803971609
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,3,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,3,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,3,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,3,0.012778667112191519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,7,0.012885333349307379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,3,0.01192533348997434
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,7,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,7,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,7,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,7,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,7,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,7,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,7,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,7,0.011621333658695221
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,15,0.012597333639860153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,15,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,15,0.013770667215188345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,15,0.01191466674208641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,15,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,15,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,15,0.012298667182525
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,31,0.013440000514189402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,31,0.0124746672809124
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,31,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,31,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,31,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,31,0.011141333729028702
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,31,0.010746666540702185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,31,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,63,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,63,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,63,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,63,0.010757333288590113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,63,0.011952000359694162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,63,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,63,0.012896000097195307
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,63,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,63,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,127,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,127,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,127,0.025290665527184803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,127,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,127,0.012757333616415659
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,127,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,127,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,255,0.012944000462690989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,255,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,255,0.011247999966144562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,255,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,255,0.012666666259368261
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,255,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,255,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,255,0.012506666282812754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,255,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,511,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,511,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,255,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,511,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,511,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,1023,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,1023,0.021568000316619873
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,1023,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,1023,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,1023,0.01691199963291486
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,1023,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,1023,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,1023,0.014335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,2047,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,2047,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,2047,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,2047,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,2047,0.025626666843891144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,2047,0.020074666788180668
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,4095,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,4095,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,4095,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,4095,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,4095,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,4095,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,4095,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,4095,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,4095,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,4095,0.022175999979178112
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,8191,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,8191,0.03857066730658213
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,8191,0.02898666759332021
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,8191,0.031173333525657654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,8191,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,8191,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,8191,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,8191,0.02865600089232127
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,8191,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,8191,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,8191,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,8191,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,16383,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,16383,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,16383,0.05220800141493479
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,16383,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,16383,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,16383,0.057999998331069946
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,16383,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,16383,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,16383,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,16383,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,16383,0.05324266850948334
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,16383,0.052229334910710655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,16383,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,16383,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,32767,0.08808533350626628
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,32767,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,32767,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,32767,0.10036800305048625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,32767,0.08157333234945933
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,32767,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,32767,0.10069866975148518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,32767,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,32767,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,32767,0.08227733274300893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,32767,0.08157333234945933
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,32767,0.0795253316561381
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,32767,0.08191466828187306
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,32767,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,65535,0.17083734273910522
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,65535,0.13554666439692178
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,65535,0.13192533453305563
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,65535,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,65535,0.1322719951470693
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,65535,0.13210133711496988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,65535,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,65535,0.13516799608866373
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,65535,0.1693013310432434
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,65535,0.13210133711496988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,65535,0.13243732849756876
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,65535,0.13141333063443503
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,65535,0.13210133711496988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,65535,0.13226667046546936
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,1,131071,0.24644267559051514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,2,131071,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,4,131071,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,8,131071,0.23347200949986777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,16,131071,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,8,1,64,131071,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,32,131071,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,1,131071,0.24576000372568765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,2,131071,0.31248533725738525
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,4,131071,0.23245867093404135
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,8,131071,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,16,131071,0.23346134026845297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,32,131071,0.23041599988937378
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,1,0.023082666099071503
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,8,1,64,131071,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,1,0.03908266623814901
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,1,0.011519999553759893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,1,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,1,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,1,0.012495999534924826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,3,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,3,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,3,0.02027733375628789
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,3,0.015664000064134598
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,3,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,3,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,7,0.01321600005030632
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,7,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,7,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,7,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,7,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,7,0.013712000101804733
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,7,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,7,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,15,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,7,0.01850133389234543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,15,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,15,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,15,0.012543999900420507
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,15,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,15,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,31,0.012821332861979803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,31,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,31,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,31,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,31,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,31,0.013354666531085968
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,31,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,31,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,31,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,31,0.010890666395425797
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,63,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,63,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,63,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,63,0.011952000359694162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,63,0.013338666409254074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,63,0.012666666259368261
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,63,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,63,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,63,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,127,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,127,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,127,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,127,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,127,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,127,0.013189333180586496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,127,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,255,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,255,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,511,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,511,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,255,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,255,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,511,0.013594667116800943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,511,0.012831999609867731
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,511,0.0143306665122509
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,511,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,511,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,511,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,511,0.014890667051076889
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,511,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,1023,0.014650666465361914
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,1023,0.015754666179418564
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,1023,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,1023,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,1023,0.014901333798964819
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,1023,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,1023,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,2047,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,2047,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,2047,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,2047,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,4095,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,4095,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,2047,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,4095,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,4095,0.029898665845394135
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,4095,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,4095,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,4095,0.036730666955312095
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,4095,0.034143999218940735
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,4095,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,4095,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,4095,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,4095,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,8191,0.059061333537101746
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,8191,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,8191,0.05121066669623057
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,8191,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,8191,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,8191,0.04608533283074697
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,8191,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,8191,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,8191,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,8191,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,8191,0.04747733473777771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,8191,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,8191,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,8191,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,16383,0.08365333080291748
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,16383,0.09763733545939128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,16383,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,16383,0.07508799930413564
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,16383,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,16383,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,16383,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,16383,0.08362666765848796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,16383,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,16383,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,16383,0.07512533167997996
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,16383,0.0751093327999115
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,16383,0.07406400144100189
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,16383,0.07474133372306824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,32767,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,32767,0.1770026683807373
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,32767,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,32767,0.13104533155759177
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,32767,0.13005333145459494
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,32767,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,32767,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,32767,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,32767,0.17783466974894205
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,32767,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,32767,0.13172266880671182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,32767,0.1334773302078247
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,32767,0.13039466738700867
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,32767,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,65535,0.24473599592844644
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,65535,0.233130673567454
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,65535,0.3343626658121745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,65535,0.23040533065795898
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,65535,0.2317813237508138
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,65535,0.23487999041875204
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,65535,0.23313599824905396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,65535,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,65535,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,65535,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,65535,0.33536001046498615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,65535,0.23073599735895792
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,65535,0.23278933763504028
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,65535,0.2307466665903727
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,1,131071,0.4507360061009725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,2,131071,0.6490453481674194
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,4,131071,0.4278666575749715
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,32,131071,0.4261600176493327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,8,131071,0.4268266757329305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,16,1,64,131071,0.42683732509613037
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,1,131071,0.45209598541259766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,2,131071,0.6493866840998331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,8,131071,0.4278666575749715
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,16,131071,0.42550933361053467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,4,131071,0.429909348487854
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,16,131071,0.42922667662302655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,32,131071,0.4251306851704915
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,16,1,64,131071,0.4264959891637166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,1,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,1,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,1,0.029834667841593426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,1,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,1,0.012752000242471695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,1,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,1,0.012842666357755661
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,1,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,3,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,3,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,3,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,3,0.012831999609867731
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,3,0.027877333263556164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,3,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,3,0.013466666142145792
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,3,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,3,0.012666666259368261
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,3,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,3,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,3,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,7,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,7,0.013888000200192133
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,7,0.012938667088747025
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,7,0.01471466695268949
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,7,0.016906666258970898
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,7,0.019567999988794327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,7,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,7,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,7,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,15,0.014896000425020853
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,15,0.012949333836634954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,15,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,15,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,15,0.012821332861979803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,15,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,15,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,15,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,31,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,31,0.012831999609867731
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,31,0.012831999609867731
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,31,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,31,0.014645333091417948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,31,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,63,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,63,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,63,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,63,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,63,0.014090667168299357
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,63,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,63,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,63,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,127,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,127,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,127,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,127,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,127,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,127,0.017792000124851864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,127,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,127,0.014570667097965876
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,127,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,127,0.012944000462690989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,127,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,255,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,255,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,255,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,255,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,255,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,255,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,255,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,255,0.013221333424250284
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,255,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,255,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,255,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,255,0.013301332791646322
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,511,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,511,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,511,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,511,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,511,0.014335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,1023,0.030378667016824085
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,1023,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,1023,0.015706667055686314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,1023,0.019237333287795384
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,1023,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,1023,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,1023,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,1023,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,1023,0.0164533331990242
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,1023,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,2047,0.035818666219711304
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,2047,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,2047,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,2047,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,2047,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,2047,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,2047,0.03311999887228012
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,2047,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,2047,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,2047,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,2047,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,2047,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,4095,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,2047,0.0354666660229365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,4095,0.05939733485380808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,4095,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,4095,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,4095,0.045050665736198425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,4095,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,4095,0.04435733457406362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,4095,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,4095,0.04710933566093445
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,4095,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,4095,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,4095,0.04370133578777313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,4095,0.04506133496761322
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,4095,0.04470400015513102
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,8191,0.08602133393287659
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,8191,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,8191,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,8191,0.07032000025113423
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,8191,0.07065600156784058
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,8191,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,8191,0.0867039958635966
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,8191,0.0993226667245229
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,8191,0.07201600074768066
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,8191,0.0744053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,8191,0.07030933101971944
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,8191,0.07099199791749318
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,8191,0.0703306645154953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,8191,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,16383,0.1397599975268046
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,16383,0.17698132991790771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,16383,0.12425067027409871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,16383,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,16383,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,16383,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,16383,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,16383,0.17783466974894205
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,16383,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,16383,0.13994666934013367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,16383,0.12664000193277994
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,16383,0.12288533647855122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,16383,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,16383,0.12392000357309978
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,32767,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,32767,0.33536001046498615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,32767,0.22800532976786295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,32767,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,32767,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,32767,0.2259626587231954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,32767,0.33399999141693115
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,32767,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,32767,0.22665599981943765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,32767,0.2314186692237854
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,32767,0.22595733404159546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,32767,0.22494399547576904
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,32767,0.22699199120203653
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,32767,0.22766399383544922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,65535,0.4657546679178874
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,65535,0.6490453481674194
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,65535,0.4442453384399414
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,65535,0.43981866041819256
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,65535,0.4398080110549927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,65535,0.43774934609731037
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,65535,0.4367306629816691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,65535,0.4643893241882324
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,65535,0.4428693453470866
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,65535,0.6483626763025919
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,65535,0.43740801016489667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,65535,0.4370773235956828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,65535,0.43742398420969647
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,65535,0.4367306629816691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,1,131071,0.8772213459014893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,2,131071,1.2747039794921875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,4,131071,0.8485546906789144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,8,131071,0.8388266563415527
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,16,131071,0.8408746719360352
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,32,131071,0.837125301361084
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,32,1,64,131071,0.8401973247528076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,2,131071,1.2743679682413738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,4,131071,0.8468373616536459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,1,131071,0.8751786549886068
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,8,131071,0.8395093282063802
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,16,131071,0.8391679922739664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,32,131071,0.839850664138794
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,32,1,64,131071,0.8391733169555664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,1,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,1,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,1,0.015397333850463232
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,1,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,1,0.014293332894643148
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,3,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,3,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,7,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,3,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,3,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,3,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,7,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,7,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,7,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,7,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,7,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,7,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,7,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,7,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,7,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,7,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,7,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,15,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,15,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,15,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,15,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,15,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,15,0.014565333724021912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,15,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,31,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,31,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,31,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,31,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,31,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,31,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,31,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,31,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,31,0.014538666854302088
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,63,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,63,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,63,0.03313066562016805
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,63,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,63,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,63,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,63,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,127,0.01471466695268949
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,127,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,127,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,127,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,127,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,127,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,127,0.012890666723251343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,127,0.012762666990359625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,255,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,255,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,255,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,255,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,255,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,255,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,255,0.014869333555301031
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,255,0.01351999988158544
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,511,0.01703466723362605
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,511,0.03480000048875809
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,511,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,511,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,511,0.015381333728631338
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,511,0.02250666668017705
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,511,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,511,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,511,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,511,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,1023,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,1023,0.03551466763019562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,1023,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,1023,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,1023,0.03244800120592117
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,1023,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,1023,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,1023,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,1023,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,1023,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,1023,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,2047,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,2047,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,2047,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,2047,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,2047,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,2047,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,2047,0.04438933233420054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,2047,0.06314133107662201
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,2047,0.04814933240413666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,2047,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,2047,0.04404266675313314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,2047,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,2047,0.046069333950678505
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,2047,0.04506133496761322
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,4095,0.07202666501204173
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,4095,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,4095,0.09113599856694539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,4095,0.10275200009346008
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,4095,0.0726986676454544
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,4095,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,4095,0.07202666501204173
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,4095,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,4095,0.10379200180371602
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,4095,0.07541866600513458
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,4095,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,4095,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,4095,0.07235733171304067
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,8191,0.1307360033194224
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,8191,0.144896000623703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,8191,0.1867093245188395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,8191,0.12458133697509766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,4095,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,8191,0.12422399719556172
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,8191,0.14421332875887552
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,8191,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,8191,0.12492799758911133
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,8191,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,8191,0.18620266517003378
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,8191,0.13141866525014242
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,8191,0.1239306628704071
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,8191,0.12492799758911133
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,16383,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,16383,0.24848532676696777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,8191,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,16383,0.3537919918696086
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,16383,0.22665067513783774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,16383,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,16383,0.22664533058802286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,16383,0.22766933838526407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,16383,0.24848000208536783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,16383,0.35447998841603595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,16383,0.22631466388702393
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,16383,0.22766933838526407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,16383,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,16383,0.227674663066864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,16383,0.23314666748046875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,32767,0.6912000179290771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,32767,0.4715520143508911
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,32767,0.44390400250752765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,32767,0.4394773244857788
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,32767,0.43880001703898114
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,32767,0.4391466776529948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,32767,0.439791997273763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,32767,0.47121067841847736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,32767,0.6905173460642496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,32767,0.44390400250752765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,32767,0.438101331392924
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,32767,0.43980268637339276
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,32767,0.4374133348464966
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,32767,0.4391253391901652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,65535,0.8816640377044678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,65535,0.8458133538564047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,65535,1.3554293314615886
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,65535,0.8381439844767252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,65535,0.8401866753896078
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,65535,0.8395040035247803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,65535,0.8366080125172933
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,65535,0.8847359816233317
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,65535,1.360223929087321
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,65535,0.8471946716308594
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,65535,0.8391573429107666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,65535,0.8364266554514567
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,65535,0.838485320409139
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,65535,0.8367786407470703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,2,131071,2.6821972529093423
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,4,131071,1.6587146123250325
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,1,131071,1.7153867085774739
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,8,131071,1.6447146733601887
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,64,1,64,131071,1.6436959902445476
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,32,131071,1.6498346328735352
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,16,131071,1.6450506846110027
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,1,131071,1.7126399676005046
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,2,131071,2.6781012217203775
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,8,131071,1.6505173047383626
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,16,131071,1.645418643951416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,4,131071,1.662805398305257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,1,0.017077332983414333
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,32,131071,1.6453973452250164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,1,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,64,1,64,131071,1.6474453608194988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,1,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,1,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,3,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,3,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,3,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,3,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,3,0.015050667027632395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,3,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,3,0.01600533351302147
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,3,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,7,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,7,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,7,0.015082667271296183
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,7,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,7,0.015370666980743408
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,7,0.022890667120615642
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,7,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,7,0.01505600040157636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,15,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,15,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,15,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,15,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,15,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,15,0.020992000897725422
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,15,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,31,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,15,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,31,0.016778666526079178
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,31,0.014709333578745524
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,31,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,31,0.01708799973130226
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,31,0.01498666654030482
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,63,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,63,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,31,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,63,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,63,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,63,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,63,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,63,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,63,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,63,0.023290666441122692
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,63,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,127,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,127,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,127,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,127,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,127,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,127,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,127,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,127,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,127,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,255,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,255,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,255,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,255,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,255,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,255,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,255,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,255,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,255,0.014933332800865173
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,255,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,511,0.022511998812357586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,511,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,511,0.02013333390156428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,511,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,511,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,511,0.019461333751678467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,511,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,511,0.038575999438762665
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,511,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,511,0.019461333751678467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,511,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,1023,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,1023,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,1023,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,1023,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,1023,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,1023,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,1023,0.045040001471837364
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,1023,0.048783997694651283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,1023,0.06453866759936015
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,1023,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,1023,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,1023,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,1023,0.04574933151404063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,2047,0.0784853349129359
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,2047,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,2047,0.07884266475836436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,2047,0.10546666383743286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,1023,0.04506133496761322
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,2047,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,2047,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,2047,0.07542933523654938
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,2047,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,2047,0.0778186668952306
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,2047,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,2047,0.0747573326031367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,2047,0.07474133372306824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,2047,0.07474666833877563
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,2047,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,4095,0.19129067659378052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,4095,0.1341600020726522
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,4095,0.13499200344085693
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,4095,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,4095,0.13140799601872763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,4095,0.13159466783205667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,4095,0.18943466742833456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,4095,0.13397333025932312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,4095,0.130730668703715
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,4095,0.13158933321634927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,4095,0.13192533453305563
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,4095,0.13448533415794373
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,4095,0.13141333063443503
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,4095,0.13209066788355509
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,8191,0.24030399322509766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,8191,0.3575466473897298
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,8191,0.24303466081619263
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,8191,0.23688532908757529
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,8191,0.24098666508992514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,8191,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,8191,0.2402986685434977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,8191,0.23959465821584067
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,8191,0.3572053511937459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,8191,0.24234666426976523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,8191,0.237226665019989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,8191,0.237226665019989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,8191,0.2382240096728007
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,8191,0.2379146615664164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,16383,0.4606240193049113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,16383,0.4599413474400838
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,16383,0.4575573205947876
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,16383,0.6942880153656006
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,16383,0.46745598316192627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,16383,0.46067198117574054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,16383,0.463701327641805
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,16383,0.460970679918925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,16383,0.46165335178375244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,16383,0.46301865577697754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,16383,0.6949866612752279
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,16383,0.4585813283920288
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,16383,0.4602880080540975
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,16383,0.46029333273569745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,32767,0.8837119738260905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,32767,1.4001493453979492
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,32767,0.8857599894205729
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,32767,0.8830347061157227
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,32767,0.8823466300964355
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,32767,0.8833706378936768
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,32767,0.8857599894205729
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,32767,0.8850826422373453
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,32767,0.8874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,32767,1.4066346486409504
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,32767,0.882693370183309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,32767,0.8816693623860677
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,32767,0.8830293019612631
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,32767,0.8854186534881592
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,65535,1.7365546226501465
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,65535,2.781866709391276
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,65535,1.7290293375651042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,65535,1.7320960362752278
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,65535,1.7399519284566243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,65535,1.733125368754069
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,65535,1.7402879397074382
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,65535,1.7327839533487956
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,65535,1.7351679801940918
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,65535,2.8190720876057944
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,65535,1.7337973912556965
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,65535,1.7303892771402996
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,65535,1.7355039914449055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,65535,1.7344853083292644
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,1,131071,3.4379094441731772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,2,131071,5.686960220336914
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,4,131071,3.4467573165893555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,8,131071,3.4508320490519204
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,16,131071,3.4526185989379883
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,32,131071,3.4515679677327475
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,128,1,64,131071,3.4525814056396484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,1,131071,3.4416640599568686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,1,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,1,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,1,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,1,0.022181332111358643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,8,131071,3.438255945841471
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,1,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,2,131071,5.678768157958984
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,16,131071,3.452927907307943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,4,131071,3.4457600911458335
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,1,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,1,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,32,131071,3.4481547673543296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,1,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,1,0.021173333128293354
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,1,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,1,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,128,1,64,131071,3.4488372802734375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,1,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,3,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,3,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,3,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,3,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,3,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,3,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,3,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,3,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,3,0.02182399978240331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,3,0.021840001145998638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,3,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,3,0.02117866774400075
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,7,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,7,0.0351946676770846
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,7,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,7,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,7,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,7,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,7,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,7,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,7,0.02458133300145467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,7,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,7,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,7,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,7,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,7,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,15,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,15,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,15,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,15,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,15,0.021850667893886566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,15,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,15,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,15,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,15,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,15,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,15,0.035242666800816856
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,15,0.021903999149799347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,15,0.021840001145998638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,31,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,31,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,31,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,31,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,31,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,31,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,31,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,31,0.020794666061798733
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,63,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,63,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,63,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,63,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,63,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,63,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,127,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,63,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,63,0.021253332495689392
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,63,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,127,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,127,0.022890667120615642
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,63,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,127,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,127,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,127,0.020842666427294414
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,127,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,127,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,127,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,127,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,127,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,127,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,127,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,127,0.021850667893886566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,255,0.03992533435424169
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,255,0.023237332701683044
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,255,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,255,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,255,0.03276266654332479
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,255,0.022181332111358643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,255,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,255,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,255,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,255,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,255,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,255,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,255,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,511,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,511,0.05392533540725708
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,511,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,511,0.046767999728520714
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,255,0.021183999876181286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,511,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,511,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,511,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,511,0.0689386675755183
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,511,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,511,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,511,0.05119466781616211
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,511,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,511,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,511,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,1023,0.08329066634178162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,1023,0.1109386682510376
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,1023,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,1023,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,1023,0.07578133543332417
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,1023,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,1023,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,1023,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,1023,0.07646933197975159
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,1023,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,1023,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,1023,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,1023,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,1023,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,2047,0.13993600010871887
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,2047,0.19473065932591757
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,2047,0.12972266475359598
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,2047,0.13587733109792074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,2047,0.1307253340880076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,2047,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,2047,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,2047,0.1307199994723002
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,2047,0.13192533453305563
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,2047,0.13346667091051737
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,2047,0.1938719948132833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,2047,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,2047,0.12969600160916647
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,2047,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,4095,0.36232535044352215
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,4095,0.2385866641998291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,4095,0.246778666973114
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,4095,0.23817066351572672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,4095,0.24438933531443277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,4095,0.23825067281723022
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,4095,0.23892800013224283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,4095,0.24167466163635254
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,4095,0.36130134264628094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,4095,0.24268800020217896
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,4095,0.23756267627080283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,4095,0.24541866779327393
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,4095,0.23689067363739014
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,4095,0.23825067281723022
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,8191,0.46642665068308514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,8191,0.6922346750895182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,8191,0.46234134833017987
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,8191,0.4551786581675212
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,8191,0.45653335253397626
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,8191,0.4664320151011149
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,8191,0.45482667287190753
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,8191,0.45585068066914874
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,8191,0.6908586819966634
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,8191,0.46299731731414795
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,8191,0.45551466941833496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,8191,0.4561920166015625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,8191,0.4561973412831624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,8191,0.45413867632548016
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,16383,0.904538631439209
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,16383,0.9137492974599203
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,16383,1.3704586029052734
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,16383,0.9007786909739176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,16383,0.9000960191090902
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,16383,0.8994080225626627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,16383,0.8994293212890625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,16383,0.9140533606211344
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,16383,1.3765974044799805
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,16383,0.9028266270955404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,16383,0.9007786909739176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,16383,0.9031786918640137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,16383,0.9024693171183268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,16383,0.9007786909739176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,32767,2.8299999237060547
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,32767,1.7730560302734375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,32767,1.7614506085713704
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,32767,1.7556479771931965
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,32767,1.7573599815368652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,32767,1.7634986241658528
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,32767,1.7549653053283691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,32767,1.774079958597819
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,32767,2.8255573908487954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,32767,1.7658880551656086
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,32767,1.7580374081929524
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,32767,1.7604319254557292
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,32767,1.7546240488688152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,32767,1.7570133209228516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,1,65535,3.4996906916300454
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,2,65535,5.776896158854167
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,16,65535,3.476815859476725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,32,65535,3.489450772603353
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,256,1,64,65535,3.4867254892985025
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,4,65535,3.475802739461263
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,8,65535,3.483306566874186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,1,65535,3.502762794494629
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,1,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,1,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,1,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,1,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,1,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,1,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,1,0.03278400003910065
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,2,65535,5.749077479044597
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,1,0.061093335350354515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,1,0.039274667700131737
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,1,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,1,0.03550933301448822
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,1,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,4,65535,3.4825973510742188
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,3,0.03583466758330663
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,3,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,3,0.03857066730658213
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,8,65535,3.4761387507120767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,16,65535,3.50276788075765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,3,0.03173333406448364
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,32,65535,3.4914986292521157
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,3,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,3,0.031386665999889374
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,256,1,64,65535,3.4826186498006186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,3,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,3,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,3,0.038218667109807335
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,7,0.03824000060558319
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,3,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,7,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,7,0.061103999614715576
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,7,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,7,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,7,0.03309866786003113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,7,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,7,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,7,0.0354720006386439
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,7,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,7,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,7,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,7,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,7,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,15,0.060421332716941833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,15,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,15,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,15,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,15,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,15,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,15,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,15,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,15,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,15,0.06076266864935557
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,15,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,15,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,31,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,31,0.03719466676314672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,31,0.06005866825580597
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,31,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,31,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,31,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,31,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,31,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,31,0.03513066718975703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,31,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,31,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,63,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,63,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,63,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,63,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,63,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,63,0.03928533444801966
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,63,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,63,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,63,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,63,0.03309866786003113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,127,0.04470933477083842
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,127,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,127,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,127,0.037605332831541695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,127,0.032416000962257385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,127,0.03311999887228012
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,127,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,127,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,127,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,127,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,127,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,127,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,127,0.03922666609287262
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,127,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,255,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,255,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,255,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,255,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,255,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,255,0.0692853331565857
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,255,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,255,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,255,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,255,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,255,0.04576000074545542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,511,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,511,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,511,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,511,0.0904373327891032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,511,0.0795360008875529
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,511,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,511,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,511,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,511,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,511,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,511,0.07854933540026347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,511,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,511,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,511,0.0778186668952306
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,1023,0.1474560002485911
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,1023,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,1023,0.18756266434987387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,1023,0.13567466537157694
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,1023,0.13619200388590494
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,1023,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,1023,0.13448533415794373
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,1023,0.14594133694966635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,1023,0.18789867560068765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,1023,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,1023,0.1367039978504181
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,1023,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,1023,0.13550933202107748
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,1023,0.13636799653371176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,2047,0.2563413381576538
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,2047,0.24336532751719156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,2047,0.24985599517822266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,2047,0.24438933531443277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,2047,0.3380853335062663
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,2047,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,2047,0.24064532915751138
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,2047,0.34013867378234863
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,2047,0.25600532690684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,2047,0.25019200642903644
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,2047,0.2437173326810201
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,2047,0.23859200874964395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,2047,0.244053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,2047,0.2416586677233378
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,4095,0.637440005938212
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,4095,0.4695093234380086
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,4095,0.47563199202219647
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,4095,0.4602880080540975
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,4095,0.45960533618927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,4095,0.459935983022054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,4095,0.46164798736572266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,4095,0.47701334953308105
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,4095,0.6377813418706259
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,4095,0.4695039987564087
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,4095,0.4613120158513387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,4095,0.45960533618927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,4095,0.4602880080540975
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,4095,0.45823466777801514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,8191,0.9244960149129232
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,8191,1.2298239866892497
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,8191,0.9147679805755615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,8191,0.904538631439209
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,8191,0.9048746426900228
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,8191,0.9069226582845052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,8191,0.9011147022247314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,8191,0.9251626332600912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,8191,1.230847994486491
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,8191,0.9041866461435953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,8191,0.9127253691355387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,8191,0.9000960191090902
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,8191,0.9052159786224365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,8191,0.9062346617380778
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,16383,1.8394452730814617
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,16383,2.512213389078776
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,16383,1.8249386151631672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,16383,1.8164052963256836
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,16383,1.8102614084879558
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,16383,1.808890660603841
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,16383,1.8088960647583008
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,16383,1.8408266703287761
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,16383,1.817087968190511
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,16383,2.507429281870524
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,16383,1.8133333524068196
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,16383,1.8136746088663738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,16383,1.8082133928934734
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,16383,1.810266653696696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,1,32767,3.5805867513020835
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,2,32767,5.2640425364176435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,4,32767,3.5585705439249673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,8,32767,3.5438880920410156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,16,32767,3.545626640319824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,32,32767,3.5503787994384766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,512,1,64,32767,3.547312100728353
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,1,32767,3.5891199111938477
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,1,0.11776000261306763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,1,0.06384533147017162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,1,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,1,0.05324266850948334
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,1,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,1,0.05151999990145365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,1,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,1,0.06381333371003468
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,1,0.1167093316713969
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,1,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,1,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,2,32767,5.262165387471517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,4,32767,3.557546615600586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,1,0.0532533327738444
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,1,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,1,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,3,0.11573867003122966
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,3,0.06585066517194112
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,3,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,3,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,3,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,8,32767,3.5480000178019204
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,3,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,3,0.0529013325770696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,16,32767,3.540992101033529
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,3,0.11674132943153381
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,3,0.0631520003080368
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,3,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,3,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,3,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,3,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,3,0.05188799897829691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,7,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,7,0.06758933266003926
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,7,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,32,32767,3.5425281524658203
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,7,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,7,0.06278933087984721
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,7,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,7,0.05292266607284546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,7,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,7,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,7,0.05389333268006643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,7,0.1160533328851064
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,7,0.051872000098228455
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,512,1,64,32767,3.550719896952311
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,7,0.06555200119813283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,7,0.05292266607284546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,15,0.05292266607284546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,15,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,15,0.06692799925804138
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,15,0.1160533328851064
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,15,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,15,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,15,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,15,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,15,0.06758933266003926
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,15,0.052570665876070656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,15,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,15,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,15,0.05187733471393585
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,15,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,31,0.0634933312733968
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,31,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,31,0.11742400129636128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,31,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,31,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,31,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,31,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,31,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,31,0.054618666569391884
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,31,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,31,0.11673600474993388
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,31,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,31,0.05221866567929586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,31,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,63,0.11570666233698527
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,63,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,63,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,63,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,63,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,63,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,63,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,63,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,63,0.05324266850948334
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,63,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,63,0.06723733246326447
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,63,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,63,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,63,0.05189866820971171
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,127,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,127,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,127,0.1167680025100708
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,127,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,127,0.06145599981149038
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,127,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,127,0.07235733171304067
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,127,0.0699786643187205
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,127,0.11673600474993388
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,127,0.07202666501204173
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,127,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,127,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,127,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,127,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,255,0.10171199838320415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,255,0.12321600317955017
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,255,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,255,0.08499733606974284
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,255,0.0740479975938797
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,255,0.07411199808120728
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,255,0.07168533404668172
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,255,0.09657599528630574
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,255,0.0825973351796468
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,255,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,255,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,255,0.12388799587885539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,255,0.07236800094445546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,255,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,511,0.20906666914621988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,511,0.15172800421714783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,511,0.13329600294431052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,511,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,511,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,511,0.13397333025932312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,511,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,511,0.20821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,511,0.15308800339698792
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,511,0.14404267072677612
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,511,0.13363200426101685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,511,0.13223999738693237
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,511,0.1327786644299825
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,511,0.13363200426101685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,1023,0.2491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,1023,0.35787733395894367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,1023,0.25462933381398517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,1023,0.2300693392753601
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,1023,0.23177067438761392
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,1023,0.2355146606763204
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,1023,0.2317919929822286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,1023,0.2566933234532674
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,1023,0.3572053511937459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,1023,0.23519466320673624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,1023,0.24643733104070029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,1023,0.22937599817911783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,1023,0.2310826579729716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,1023,0.23213332891464233
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,2047,0.45894932746887207
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,2047,0.6577706734339396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,2047,0.4387893279393514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,2047,0.4357173442840576
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,2047,0.43401066462198895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,2047,0.4486773411432902
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,2047,0.43400001525878906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,2047,0.4582506815592448
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,2047,0.6555306514104208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,2047,0.44461333751678467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,2047,0.437391996383667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,2047,0.4336640040079753
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,2047,0.434005339940389
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,2047,0.43263999621073407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,4095,0.874842643737793
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,4095,1.2506453196207683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,4095,0.851973295211792
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,4095,0.859813372294108
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,4095,0.8471893469492594
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,4095,0.8471893469492594
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,4095,0.8447999954223633
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,4095,0.8727946281433105
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,4095,0.8656160036722819
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,4095,1.2485919793446858
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,4095,0.8506027062733968
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,4095,0.8485546906789144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,4095,0.8468480110168457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,4095,0.8468426863352457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,8191,1.7099199295043945
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,8191,2.43831459681193
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,8191,1.6868693033854167
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,8191,1.7064959208170574
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,8191,1.6971093813578289
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,8191,1.6764639218648274
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,8191,1.688576062520345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,8191,1.7119626998901367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,8191,2.431488037109375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,8191,1.6929866472880046
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,8191,1.6954239209493
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,8191,1.6878933906555176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,8191,1.6820906003316243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,8191,1.6768053372701008
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,1,16383,3.4259840647379556
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,2,16383,4.985343933105469
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,4,16383,3.4102614720662436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,8,16383,3.39029852549235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,16,16383,3.399168014526367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,32,16383,3.38482666015625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,1,1024,1,64,16383,3.3868799209594727
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,1,16383,3.4063361485799155
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,1,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,1,0.011621333658695221
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,1,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,3,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,7,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,7,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,7,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,2,16383,5.029888153076172
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,15,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,15,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,31,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,15,0.010618666807810465
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,4,16383,3.4095840454101562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,31,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,31,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,31,0.011578666667143503
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,31,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,8,16383,3.40394655863444
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,31,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,31,0.011920000116030375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,31,0.021397332350413006
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,63,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,16,16383,3.3923467000325522
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,63,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,63,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,63,0.012815999488035837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,63,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,63,0.017514667163292568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,32,16383,3.396426518758138
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,127,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,1,1024,1,64,16383,3.376986821492513
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,127,0.011301333705584208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,127,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,127,0.012597333639860153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,127,0.012666666259368261
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,127,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,255,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,255,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,255,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,255,0.012597333639860153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,255,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,255,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,511,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,511,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,511,0.014767999450365702
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,511,0.015333333363135656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,511,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,511,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,511,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,1023,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,1023,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,1023,0.015061333775520325
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,1023,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,1023,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,1023,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,1023,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,2047,0.017114666601022083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,2047,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,2047,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,2047,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,2047,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,2047,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,4095,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,2047,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,4095,0.022842665513356526
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,4095,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,4095,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,4095,0.018415999909241993
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,4095,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,8191,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,8191,0.020853333175182343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,8191,0.02902399996916453
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,16383,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,16383,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,16383,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,16383,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,16383,0.02661866694688797
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,16383,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,16383,0.027189334233601887
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,16383,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,16383,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,16383,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,16383,0.0262773334980011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,32767,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,32767,0.03513066718975703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,32767,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,32767,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,32767,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,32767,0.03376533339420954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,32767,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,32767,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,32767,0.033786666889985405
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,32767,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,32767,0.03550933301448822
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,32767,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,65535,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,65535,0.04537599782148997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,65535,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,65535,0.04301333427429199
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,65535,0.043322667479515076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,65535,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,65535,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,65535,0.05051200091838837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,65535,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,65535,0.043023998538653054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,65535,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,65535,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1,1,1,131071,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1,1,4,131071,0.07918400069077809
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1,1,2,131071,0.06795200208822887
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1,1,16,131071,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1,1,8,131071,0.06520000100135803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1,1,32,131071,0.06144533554712931
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1,1,64,131071,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1,1,1,131071,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1,1,2,131071,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1,1,4,131071,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1,1,8,131071,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1,1,16,131071,0.061808000008265175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1,1,64,131071,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1,1,32,131071,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,1,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,1,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,1,0.010869332899649939
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,1,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,1,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,1,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,1,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,3,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,3,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,3,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,3,0.012485332787036896
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,3,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,3,0.011952000359694162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,3,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,3,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,7,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,7,0.012506666282812754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,7,0.012896000097195307
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,3,0.011621333658695221
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,7,0.01250133290886879
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,7,0.012543999900420507
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,7,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,7,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,7,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,15,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,15,0.011589333415031433
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,15,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,15,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,15,0.012586666891972223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,15,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,15,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,15,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,15,0.01098666712641716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,31,0.012543999900420507
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,31,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,31,0.01250133290886879
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,63,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,63,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,63,0.010890666395425797
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,63,0.012586666891972223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,63,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,63,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,63,0.01257066677014033
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,127,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,63,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,127,0.012586666891972223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,127,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,127,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,127,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,127,0.014453332871198654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,127,0.012597333639860153
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,127,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,127,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,255,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,255,0.012746666868527731
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,255,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,255,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,255,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,255,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,511,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,511,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,511,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,511,0.012938667088747025
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,511,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,511,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,511,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,511,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,1023,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,1023,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,1023,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,1023,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,1023,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,1023,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,1023,0.01267733300725619
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,1023,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,2047,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,2047,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,2047,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,2047,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,2047,0.01714133347074191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,2047,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,2047,0.012495999534924826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,4095,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,4095,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,4095,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,4095,0.018960000326236088
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,4095,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,4095,0.02698666602373123
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,8191,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,8191,0.029050665597120922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,8191,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,4095,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,8191,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,8191,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,8191,0.02319466571013133
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,8191,0.021146667500336964
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,16383,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,16383,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,16383,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,16383,0.044351999958356224
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,16383,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,16383,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,16383,0.04472533365090688
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,16383,0.036858665446440377
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,16383,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,16383,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,16383,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,16383,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,32767,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,32767,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,32767,0.043338666359583534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,32767,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,16383,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,32767,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,32767,0.05529066423575083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,32767,0.04337066908677419
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,32767,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,32767,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,32767,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,32767,0.05565866827964783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,65535,0.054586668809254967
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,65535,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,65535,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,65535,0.06588799754778545
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,32767,0.05356800059477488
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,32767,0.057349334160486855
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,65535,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,65535,0.07642666498819987
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,65535,0.05496533215045929
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,65535,0.06451733410358429
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,65535,0.0798773318529129
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,65535,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,65535,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,65535,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,65535,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,2,1,2,131071,0.09181867043177287
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,2,1,1,131071,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,65535,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,2,1,8,131071,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,2,1,32,131071,0.10616000493367513
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,2,1,1,131071,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,2,1,4,131071,0.10274666547775269
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,2,1,16,131071,0.10241066416104634
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,2,1,2,131071,0.09216533104578654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,2,1,64,131071,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,2,1,8,131071,0.0897706647713979
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,2,1,32,131071,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,2,1,16,131071,0.10447999835014343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,2,1,64,131071,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,1,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,2,1,4,131071,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,1,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,1,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,1,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,1,0.013376000026861826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,1,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,1,0.013455999394257864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,3,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,3,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,3,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,3,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,3,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,3,0.011007999380429586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,3,0.011274666835864386
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,7,0.013999999811251959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,7,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,7,0.011952000359694162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,7,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,15,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,15,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,15,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,15,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,31,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,31,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,31,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,31,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,31,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,31,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,31,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,31,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,31,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,31,0.01231466606259346
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,63,0.012293333808581034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,63,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,63,0.012282667060693106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,63,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,127,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,127,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,127,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,127,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,127,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,255,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,255,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,255,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,255,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,255,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,511,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,511,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,1023,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,511,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,1023,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,1023,0.014650666465361914
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,511,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,1023,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,1023,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,1023,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,1023,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,2047,0.016832000265518825
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,2047,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,2047,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,4095,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,4095,0.020848001043001812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,4095,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,4095,0.025285333395004272
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,4095,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,4095,0.017082666357358296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,4095,0.026954665780067444
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,4095,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,4095,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,4095,0.017082666357358296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,8191,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,8191,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,8191,0.03418133407831192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,8191,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,8191,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,8191,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,8191,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,8191,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,8191,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,8191,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,8191,0.029333333174387615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,16383,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,8191,0.031093334158261616
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,16383,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,16383,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,16383,0.04026666780312856
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,16383,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,16383,0.03824000060558319
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,8191,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,16383,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,16383,0.035455999275048576
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,16383,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,16383,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,16383,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,16383,0.03312533348798752
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,16383,0.03923200070858002
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,32767,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,32767,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,32767,0.06585066517194112
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,32767,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,32767,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,32767,0.055973331133524575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,32767,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,32767,0.05632533133029938
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,32767,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,32767,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,32767,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,32767,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,32767,0.06247466802597046
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,65535,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,65535,0.0815413345893224
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,32767,0.061103999614715576
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,65535,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,65535,0.08807999889055888
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,65535,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,65535,0.08711999654769897
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,65535,0.08736532926559448
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,65535,0.08158400158087413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,65535,0.08257066706816356
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,65535,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,65535,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,65535,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,65535,0.08738666772842407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,65535,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,4,1,1,131071,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,4,1,2,131071,0.13500266273816428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,4,1,4,131071,0.17305066188176474
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,4,1,32,131071,0.14045866330464682
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,4,1,16,131071,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,4,1,8,131071,0.13499733805656433
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,4,1,2,131071,0.1365333298842112
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,4,1,4,131071,0.17356266578038534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,4,1,64,131071,0.13994666934013367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,4,1,1,131071,0.13328533371289572
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,4,1,8,131071,0.13346667091051737
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,4,1,16,131071,0.1365386644999186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,1,0.013541333377361298
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,4,1,32,131071,0.1418239971001943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,4,1,64,131071,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,1,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,1,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,1,0.012821332861979803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,1,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,1,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,1,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,3,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,1,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,1,0.012159999459981918
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,3,0.012741333494583765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,3,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,3,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,3,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,3,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,3,0.012293333808581034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,7,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,3,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,7,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,7,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,7,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,7,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,7,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,7,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,7,0.01128000020980835
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,7,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,7,0.01181866725285848
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,15,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,15,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,15,0.014101333916187286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,15,0.013637332866589228
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,15,0.010992000500361124
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,31,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,31,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,31,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,31,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,31,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,31,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,31,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,63,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,63,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,63,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,63,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,63,0.011578666667143503
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,63,0.013642666240533194
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,63,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,127,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,127,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,127,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,127,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,127,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,127,0.012565333396196365
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,127,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,127,0.013562666873137156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,127,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,127,0.021477334201335907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,255,0.013056000073750814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,255,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,255,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,255,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,255,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,255,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,511,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,511,0.015509333461523056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,511,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,511,0.01267733300725619
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,511,0.015034666905800501
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,1023,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,511,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,1023,0.01571200042963028
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,1023,0.013210666676362356
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,1023,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,1023,0.013845333208640417
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,1023,0.022895999252796173
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,1023,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,2047,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,1023,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,1023,0.015429332852363586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,1023,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,2047,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,2047,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,2047,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,2047,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,2047,0.01708799973130226
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,2047,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,4095,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,4095,0.03209600100914637
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,4095,0.019461333751678467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,4095,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,4095,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,4095,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,4095,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,4095,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,4095,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,8191,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,4095,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,8191,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,8191,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,8191,0.03789333254098892
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,8191,0.031119999786218006
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,8191,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,8191,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,8191,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,8191,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,8191,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,16383,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,16383,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,16383,0.0631573349237442
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,16383,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,16383,0.0525439977645874
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,16383,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,16383,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,16383,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,16383,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,16383,0.0529120018084844
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,16383,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,16383,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,16383,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,16383,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,32767,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,32767,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,32767,0.08737599849700928
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,32767,0.10001599788665771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,32767,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,32767,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,32767,0.08363200227419536
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,32767,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,32767,0.08155733346939087
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,32767,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,32767,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,32767,0.08088533580303192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,32767,0.08158400158087413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,32767,0.07986666758855183
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,65535,0.1349066694577535
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,65535,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,65535,0.16980799039204916
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,65535,0.13243200381596884
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,65535,0.13156267007191977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,65535,0.13687466581662497
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,65535,0.13551466663678488
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,65535,0.13312000036239624
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,65535,0.13686399658521017
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,65535,0.17151999473571777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,65535,0.13482133547465006
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,65535,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,65535,0.1365386644999186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,65535,0.13261333107948303
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,8,1,1,131071,0.23688532908757529
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,8,1,2,131071,0.23585599660873413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,8,1,4,131071,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,8,1,8,131071,0.2300586700439453
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,8,1,16,131071,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,8,1,32,131071,0.23279466231664023
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,8,1,64,131071,0.2297226587931315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,8,1,4,131071,0.3128319978713989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,8,1,2,131071,0.23859200874964395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,8,1,1,131071,0.23813867568969727
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,8,1,8,131071,0.2310826579729716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,8,1,16,131071,0.2297226587931315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,8,1,32,131071,0.233130673567454
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,1,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,1,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,8,1,64,131071,0.231440007686615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,1,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,1,0.012837332983811697
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,1,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,1,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,1,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,1,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,3,0.019978666057189304
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,3,0.012805332740147909
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,3,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,3,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,3,0.014730667074521383
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,3,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,7,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,7,0.013658666362365087
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,7,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,7,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,7,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,15,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,7,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,15,0.016373333831628162
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,15,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,15,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,15,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,15,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,15,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,31,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,31,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,31,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,31,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,31,0.012698666503032049
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,31,0.014335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,63,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,63,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,63,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,63,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,63,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,63,0.013658666362365087
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,63,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,63,0.01157333329319954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,127,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,63,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,127,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,127,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,127,0.01303999995191892
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,127,0.016154666741689045
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,127,0.012826666235923767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,127,0.013946666071812311
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,255,0.014709333578745524
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,127,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,127,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,255,0.012944000462690989
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,255,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,255,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,255,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,255,0.015696000307798386
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,255,0.014544000228246054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,255,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,255,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,255,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,511,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,511,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,511,0.012949333836634954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,1023,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,1023,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,1023,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,1023,0.022831998765468597
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,2047,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,2047,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,1023,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,2047,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,2047,0.01874133323629697
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,2047,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,2047,0.018757333358128864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,2047,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,2047,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,2047,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,2047,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,2047,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,4095,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,4095,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,2047,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,4095,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,4095,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,4095,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,4095,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,4095,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,4095,0.03583466758330663
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,4095,0.026629333694775898
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,4095,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,4095,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,4095,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,8191,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,8191,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,4095,0.03309866786003113
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,8191,0.05904533465703329
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,4095,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,8191,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,8191,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,8191,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,8191,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,8191,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,8191,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,8191,0.058362667759259544
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,8191,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,8191,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,16383,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,8191,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,8191,0.045754666129748024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,16383,0.0846560001373291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,16383,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,16383,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,16383,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,16383,0.0744053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,16383,0.07436266541481018
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,16383,0.08431466420491536
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,16383,0.0986346701780955
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,16383,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,16383,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,16383,0.07542933523654938
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,32767,0.14131200313568115
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,16383,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,16383,0.08667733271916707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,32767,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,32767,0.1764693260192871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,32767,0.13141866525014242
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,32767,0.1269813378651937
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,32767,0.13037332892417908
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,32767,0.13005333145459494
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,32767,0.14114133516947427
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,32767,0.1783519983291626
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,32767,0.13378133376439413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,32767,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,32767,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,32767,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,32767,0.12938132882118225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,65535,0.2525866627693176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,65535,0.23757867018381754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,65535,0.33673067887624103
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,65535,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,65535,0.23041599988937378
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,65535,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,65535,0.2539520064989726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,65535,0.23128533363342285
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,65535,0.3349866469701131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,65535,0.2519306739171346
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,65535,0.23040000597635904
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,65535,0.23040000597635904
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,65535,0.22801599899927774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,65535,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,16,1,1,131071,0.4753119945526123
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,16,1,2,131071,0.4503893454869588
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,16,1,4,131071,0.65774933497111
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,16,1,8,131071,0.4278666575749715
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,16,1,32,131071,0.43093331654866535
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,16,1,64,131071,0.4275199969609578
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,16,1,16,131071,0.4271786610285441
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,16,1,4,131071,0.6541653474171957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,16,1,2,131071,0.4507360061009725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,16,1,1,131071,0.47564268112182617
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,16,1,8,131071,0.42958935101826984
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,16,1,16,131071,0.4264906644821167
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,16,1,32,131071,0.4292213519414266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,16,1,64,131071,0.4265013138453166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,1,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,1,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,1,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,1,0.012885333349307379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,1,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,1,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,3,0.012736000120639801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,3,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,7,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,7,0.01492799942692121
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,7,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,7,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,7,0.014645333091417948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,7,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,7,0.012949333836634954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,7,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,15,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,7,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,15,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,7,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,15,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,15,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,15,0.014645333091417948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,15,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,15,0.012960000584522883
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,31,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,31,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,31,0.014576000471909841
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,31,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,31,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,31,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,63,0.01639466608564059
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,63,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,63,0.012586666891972223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,63,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,63,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,127,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,127,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,127,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,127,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,127,0.014335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,127,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,255,0.014010666559139887
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,255,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,255,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,255,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,255,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,255,0.014335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,255,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,255,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,511,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,511,0.017632000148296356
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,511,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,511,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,511,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,511,0.02117866774400075
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,1023,0.03551466763019562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,1023,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,1023,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,1023,0.030389333764712017
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,1023,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,1023,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,1023,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,2047,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,2047,0.042677332957585655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,2047,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,2047,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,2047,0.024549332757790882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,2047,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,2047,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,2047,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,2047,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,2047,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,4095,0.05970133344332377
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,4095,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,4095,0.06314133107662201
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,4095,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,4095,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,4095,0.04609066744645437
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,4095,0.0631520003080368
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,4095,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,4095,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,4095,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,4095,0.04367466767628988
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,4095,0.044693330923716225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,4095,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,8191,0.0867199997107188
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,8191,0.09215999643007915
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,4095,0.047466665506362915
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,8191,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,8191,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,8191,0.07099199791749318
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,8191,0.07065066695213318
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,8191,0.09114133318265279
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,8191,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,8191,0.07168533404668172
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,8191,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,8191,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,8191,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,8191,0.09966400265693665
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,8191,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,16383,0.1462613344192505
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,16383,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,16383,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,16383,0.12492266297340393
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,16383,0.12389333049456279
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,16383,0.177839994430542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,16383,0.13876799742380777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,16383,0.17679999272028604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,16383,0.14642666776974997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,16383,0.1300159990787506
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,16383,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,16383,0.12286933263142903
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,16383,0.12356799840927124
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,32767,0.25738133986790973
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,32767,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,32767,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,32767,0.33399466673533124
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,16383,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,32767,0.22562134265899658
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,32767,0.22835199038187662
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,32767,0.2573653260866801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,32767,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,32767,0.334330677986145
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,32767,0.24541866779327393
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,32767,0.23177067438761392
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,32767,0.22595733404159546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,32767,0.22596800327301025
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,32767,0.22869332631429037
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,65535,0.6483626763025919
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,65535,0.4804266691207886
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,65535,0.4432213306427002
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,65535,0.43263999621073407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,65535,0.43740801016489667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,65535,0.46062934398651123
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,65535,0.4357120196024577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,65535,0.4681280056635539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,65535,0.650757352511088
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,65535,0.44253333409627277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,65535,0.43161598841349286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,65535,0.4360479911168416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,65535,0.4790613253911336
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,65535,0.4333226680755615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,32,1,1,131071,0.9241546789805094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,32,1,2,131071,0.8741546471913656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,32,1,4,131071,1.272320032119751
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,32,1,8,131071,0.8475306828816732
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,32,1,32,131071,0.8384906450907389
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,32,1,16,131071,0.8405173619588217
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,32,1,64,131071,0.8420693079630533
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,32,1,1,131071,0.922106663386027
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,32,1,2,131071,0.8782506783803304
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,32,1,4,131071,1.2757333119710286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,32,1,16,131071,0.8389973640441895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,32,1,8,131071,0.8454826672871908
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,32,1,32,131071,0.84224534034729
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,32,1,64,131071,0.8414026896158854
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,1,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,1,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,1,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,1,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,1,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,3,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,3,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,3,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,3,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,3,0.014709333578745524
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,3,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,3,0.014730667074521383
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,7,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,7,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,7,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,7,0.01434133326013883
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,15,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,15,0.014629332969586054
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,15,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,15,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,15,0.012954667210578918
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,15,0.014639999717473984
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,31,0.015370666980743408
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,31,0.015040000279744467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,31,0.013999999811251959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,31,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,31,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,31,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,63,0.014901333798964819
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,63,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,63,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,63,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,63,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,63,0.015370666980743408
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,63,0.013562666873137156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,127,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,127,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,127,0.01470400020480156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,127,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,127,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,127,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,127,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,255,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,255,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,255,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,255,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,255,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,255,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,255,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,511,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,511,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,511,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,511,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,511,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,511,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,511,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,511,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,511,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,511,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,1023,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,1023,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,1023,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,1023,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,1023,0.03516799956560135
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,1023,0.021189334491888683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,1023,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,1023,0.021141332884629566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,1023,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,2047,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,2047,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,2047,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,2047,0.06381333371003468
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,2047,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,2047,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,2047,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,2047,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,2047,0.047456001242001854
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,2047,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,2047,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,2047,0.04537599782148997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,2047,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,2047,0.044026667873064675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,4095,0.07578133543332417
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,4095,0.09079999725023906
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,4095,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,4095,0.07236800094445546
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,4095,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,4095,0.07167466481526692
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,4095,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,4095,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,4095,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,4095,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,4095,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,4095,0.0730506678422292
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,4095,0.07065600156784058
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,4095,0.07166933516661327
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,8191,0.13190399607022604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,8191,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,8191,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,8191,0.1225440005461375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,8191,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,8191,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,8191,0.14352533221244812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,8191,0.187226672967275
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,8191,0.13038399815559387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,8191,0.1855199933052063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,8191,0.12868799765904745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,8191,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,8191,0.12321066856384277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,16383,0.24234133958816528
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,8191,0.12425067027409871
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,16383,0.35549867153167725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,16383,0.22733332713445029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,16383,0.23174399137496948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,16383,0.22733867168426514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,16383,0.2508853276570638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,16383,0.22835199038187662
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,16383,0.24200532833735147
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,16383,0.35450132687886554
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,16383,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,16383,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,16383,0.2515679995218913
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,16383,0.22801067431767783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,32767,0.4626773198445638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,32767,0.4657493432362874
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,16383,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,32767,0.6877919832865397
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,32767,0.4466346502304077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,32767,0.4391253391901652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,32767,0.43674135208129883
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,32767,0.4671093225479126
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,32767,0.46301865577697754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,32767,0.4367306629816691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,32767,0.44527467091878253
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,32767,0.43775999546051025
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,32767,0.6887946923573812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,32767,0.4357120196024577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,32767,0.4360533157984416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,65535,0.9062399864196777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,65535,0.8813280264536539
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,65535,0.8485546906789144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,65535,0.837119976679484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,65535,1.354416052500407
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,65535,0.8400213718414307
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,65535,0.8425760269165039
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,65535,0.9062506357828776
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,65535,0.8823626836140951
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,65535,1.3568000793457031
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,65535,0.8471840222676595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,65535,0.8367839654286703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,65535,0.8434240023295084
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,65535,0.8376320203145345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,64,1,2,131071,1.7143467267354329
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,64,1,1,131071,1.7907999356587727
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,64,1,8,131071,1.6679253578186035
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,64,1,16,131071,1.6440320014953613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,64,1,4,131071,2.6890185674031577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,64,1,32,131071,1.6481332778930664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,64,1,64,131071,1.650858720143636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,1,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,64,1,2,131071,1.714687983194987
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,1,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,64,1,1,131071,1.7921759287516277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,64,1,16,131071,1.6488107045491536
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,64,1,8,131071,1.6682666142781575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,64,1,4,131071,2.6821972529093423
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,1,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,64,1,64,131071,1.6460800170898438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,1,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,1,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,64,1,32,131071,1.6508639653523762
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,1,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,1,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,1,0.017077332983414333
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,1,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,1,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,1,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,3,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,3,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,3,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,3,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,3,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,3,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,3,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,7,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,7,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,7,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,7,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,7,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,7,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,7,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,7,0.023189333577950794
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,7,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,7,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,15,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,15,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,15,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,15,0.020799999435742695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,15,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,15,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,15,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,31,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,31,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,31,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,31,0.020853333175182343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,31,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,31,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,31,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,31,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,63,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,31,0.014922666052977243
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,63,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,63,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,63,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,63,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,63,0.01505600040157636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,63,0.015685333559910457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,63,0.016783999900023144
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,127,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,127,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,127,0.015365333606799444
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,127,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,127,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,127,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,127,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,255,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,255,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,255,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,255,0.014725333700577417
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,255,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,255,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,255,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,255,0.022895999252796173
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,255,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,255,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,255,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,511,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,511,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,511,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,511,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,511,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,511,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,511,0.023226665953795116
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,511,0.036874666810035706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,511,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,511,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,1023,0.06520000100135803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,511,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,1023,0.05017066498597463
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,1023,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,1023,0.04914666712284088
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,1023,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,1023,0.05221866567929586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,1023,0.04437866806983948
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,1023,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,1023,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,1023,0.049173335234324135
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,1023,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,1023,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,1023,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,2047,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,2047,0.08090666433175404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,1023,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,2047,0.10614933570226033
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,2047,0.07407466570536296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,2047,0.07680533329645793
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,2047,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,2047,0.07474666833877563
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,2047,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,2047,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,2047,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,2047,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,2047,0.07541866600513458
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,2047,0.07542400062084198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,2047,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,4095,0.13755733768145242
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,4095,0.1896053353945414
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,4095,0.13159466783205667
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,4095,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,4095,0.12732266386349997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,4095,0.13209600249926248
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,4095,0.1293706695238749
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,4095,0.13346133629480997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,4095,0.18978132804234824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,4095,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,4095,0.13141333063443503
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,4095,0.13772799571355185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,4095,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,4095,0.13346133629480997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,8191,0.24780799945195517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,8191,0.3575413227081299
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,8191,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,8191,0.23824000358581543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,8191,0.24234666426976523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,8191,0.24097599585851034
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,8191,0.2409813404083252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,8191,0.24850134054819742
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,8191,0.24231467644373575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,8191,0.3575520118077596
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,8191,0.24678399165471396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,8191,0.23825067281723022
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,8191,0.23995200792948404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,8191,0.23995733261108398
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,16383,0.4633599917093913
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,16383,0.698362668355306
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,16383,0.469157338142395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,16383,0.46643733978271484
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,16383,0.45686932404836017
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,16383,0.4572319984436035
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,16383,0.46166400114695233
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,16383,0.4681386550267537
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,16383,0.463701327641805
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,16383,0.6987093289693197
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,16383,0.4592426617940267
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,16383,0.4561920166015625
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,16383,0.45789865652720135
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,16383,0.46164798736572266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,32767,0.9103360176086426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,32767,0.8854186534881592
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,32767,0.8881440162658691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,32767,1.4004640579223633
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,32767,0.8857653141021729
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,32767,0.8816640377044678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,32767,0.8837119738260905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,32767,0.8809813658396403
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,32767,0.9103626410166422
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,32767,1.3980906804402669
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,32767,0.8836906750996908
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,32767,0.8891733487447103
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,32767,0.8861013253529867
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,32767,0.8871200084686279
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,65535,1.7969493865966797
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,65535,1.7389225959777832
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,65535,1.7331199645996094
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,65535,2.8276052474975586
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,65535,1.7402879397074382
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,65535,1.7351733843485515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,65535,1.7348267237345378
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,65535,1.7921706835428874
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,65535,1.7355413436889648
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,65535,2.813573201497396
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,65535,1.7361706097920735
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,65535,1.7341440518697102
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,65535,1.7416586875915527
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,65535,1.7344800631205242
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,128,1,2,131071,3.4402987162272134
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,128,1,1,131071,3.563349405924479
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,128,1,4,131071,5.643103917439778
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,128,1,8,131071,3.4498507181803384
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,128,1,16,131071,3.438591957092285
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,128,1,32,131071,3.4502293268839517
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,128,1,64,131071,3.4542773564656577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,128,1,1,131071,3.5633440017700195
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,1,0.03583466758330663
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,128,1,2,131071,3.442351977030436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,1,0.02458133300145467
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,128,1,4,131071,5.664096196492513
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,1,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,128,1,8,131071,3.4467786153157554
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,1,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,1,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,128,1,16,131071,3.452591896057129
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,1,0.03242133309443792
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,1,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,128,1,32,131071,3.444053332010905
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,1,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,1,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,128,1,64,131071,3.452239990234375
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,3,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,3,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,3,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,3,0.021477334201335907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,3,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,3,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,3,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,3,0.021840001145998638
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,3,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,3,0.021669333179791767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,3,0.025114665428797405
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,7,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,3,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,7,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,7,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,7,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,7,0.02181866765022278
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,7,0.021514666577180225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,7,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,7,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,7,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,7,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,7,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,7,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,7,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,15,0.024570666253566742
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,15,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,15,0.022837333381175995
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,15,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,15,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,15,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,15,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,15,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,15,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,15,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,15,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,31,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,31,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,31,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,31,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,31,0.021514666577180225
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,31,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,31,0.02219199885924657
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,31,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,31,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,31,0.03207999964555105
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,31,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,31,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,63,0.023189333577950794
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,63,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,31,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,63,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,63,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,63,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,63,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,63,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,63,0.0365280012289683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,63,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,63,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,63,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,127,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,127,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,127,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,127,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,127,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,127,0.021850667893886566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,127,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,127,0.021850667893886566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,127,0.023226665953795116
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,127,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,127,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,127,0.02250133454799652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,127,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,255,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,127,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,255,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,255,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,255,0.0460746685663859
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,255,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,255,0.023221333821614582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,255,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,255,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,255,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,255,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,255,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,255,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,255,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,511,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,511,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,511,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,511,0.0499839981396993
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,511,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,511,0.045408000548680626
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,511,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,511,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,511,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,511,0.04571733375390371
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,511,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,511,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,511,0.045754666129748024
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,511,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,1023,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,1023,0.1109279990196228
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,1023,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,1023,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,1023,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,1023,0.07609599828720093
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,1023,0.07645333309968312
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,1023,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,1023,0.11161599556605022
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,1023,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,1023,0.07713599999745686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,1023,0.07679466903209686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,1023,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,1023,0.09078933795293172
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,2047,0.1389173368612925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,2047,0.14728533228238425
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,2047,0.19524266322453818
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,2047,0.12968533237775168
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,2047,0.13174933195114136
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,2047,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,2047,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,2047,0.13823466499646506
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,2047,0.12867200374603271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,2047,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,2047,0.13267733653386435
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,2047,0.19644266366958618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,2047,0.14779200156529745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,2047,0.12902399897575378
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,4095,0.25737067063649494
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,4095,0.24780267477035522
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,4095,0.36300798257191974
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,4095,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,4095,0.23961599667867026
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,4095,0.2392639915148417
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,4095,0.2573653260866801
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,4095,0.23893332481384277
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,4095,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,4095,0.24302399158477783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,4095,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,4095,0.3619840145111084
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,4095,0.24132267634073892
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,4095,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,8191,0.4780373175938924
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,8191,0.6922240257263184
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,8191,0.4647253354390462
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,8191,0.46609067916870117
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,8191,0.45721598466237384
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,8191,0.45653335253397626
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,8191,0.457205335299174
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,8191,0.47870934009552
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,8191,0.4660853147506714
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,8191,0.6915413538614908
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,8191,0.460970679918925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,8191,0.45687464872996014
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,8191,0.4578933318456014
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,8191,0.4613066514333089
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,16383,0.9069226582845052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,16383,0.9231359958648682
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,16383,0.9038506348927816
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,16383,0.9103306929270426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,16383,1.376255989074707
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,16383,0.8997546831766764
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,16383,0.9014613628387451
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,16383,0.9204053084055582
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,16383,0.9069226582845052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,16383,1.3680639266967773
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,16383,0.9038453102111816
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,16383,0.9106826782226562
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,16383,0.8973653316497803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,16383,0.9021493593851725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,32767,1.8085707028706868
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,32767,2.824533462524414
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,32767,1.7747626304626465
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,32767,1.7611093521118164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,32767,1.7641812960306804
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,32767,1.7549653053283691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,32767,1.7573599815368652
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,32767,1.8092373212178547
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,32767,1.7785174051920574
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,32767,2.82862917582194
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,32767,1.762821356455485
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,32767,1.7587199211120605
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,32767,1.758384068806966
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,32767,1.762133280436198
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,256,1,1,65535,3.5785385767618814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,256,1,2,65535,3.501392046610514
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,256,1,8,65535,3.4816052118937173
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,256,1,4,65535,5.756074905395508
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,256,1,32,65535,3.491845448811849
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,256,1,64,65535,3.477504094441732
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,256,1,16,65535,3.482624053955078
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,256,1,1,65535,3.5829760233561196
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,1,0.05494399865468343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,1,0.038245332737763725
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,1,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,256,1,2,65535,3.4955787658691406
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,1,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,256,1,4,65535,5.791237513224284
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,1,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,1,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,1,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,1,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,1,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,1,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,1,0.033071999748547874
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,256,1,8,65535,3.492512067159017
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,256,1,16,65535,3.4757919311523438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,1,0.03143466760714849
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,3,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,3,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,256,1,32,65535,3.4863786697387695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,3,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,3,0.03141333411137263
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,3,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,3,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,256,1,64,65535,3.482624053955078
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,3,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,3,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,3,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,3,0.03141866624355316
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,3,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,3,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,3,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,7,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,7,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,7,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,7,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,7,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,7,0.05495999753475189
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,7,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,7,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,7,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,7,0.05494933327039083
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,7,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,7,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,7,0.03141333411137263
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,7,0.06149866680304209
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,15,0.06076266864935557
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,15,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,15,0.03751466671625773
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,15,0.031370667119820915
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,15,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,15,0.03928533444801966
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,15,0.05566933254400889
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,15,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,15,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,15,0.03756266583998998
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,15,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,15,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,15,0.03276266654332479
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,15,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,31,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,31,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,31,0.03956799954175949
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,31,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,31,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,31,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,31,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,31,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,31,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,31,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,31,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,31,0.0317493329445521
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,63,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,63,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,63,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,63,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,63,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,63,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,63,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,63,0.03175999969244003
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,63,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,63,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,63,0.032773333291212715
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,127,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,127,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,127,0.03207999964555105
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,127,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,127,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,127,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,127,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,127,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,127,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,127,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,127,0.036890665690104164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,127,0.032933334509531655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,127,0.044719999035199486
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,127,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,255,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,255,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,255,0.0460746685663859
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,255,0.05051200091838837
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,255,0.06863466898600261
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,255,0.060773332913716636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,255,0.04572799801826477
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,255,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,255,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,255,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,255,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,255,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,511,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,255,0.044719999035199486
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,511,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,511,0.08497599760691325
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,511,0.11264533797899882
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,511,0.08945066730181377
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,511,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,511,0.08051733175913493
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,511,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,511,0.08054933448632558
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,511,0.10102933645248413
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,511,0.08565866947174072
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,511,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,511,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,511,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,1023,0.15291200081507364
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,1023,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,1023,0.1873813271522522
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,1023,0.13705066839853922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,1023,0.13396799564361572
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,1023,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,1023,0.13618666927019754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,1023,0.15308266878128052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,1023,0.1469439963499705
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,1023,0.18789867560068765
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,1023,0.14299199978510538
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,1023,0.13807466626167297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,1023,0.13570666313171387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,1023,0.1358506679534912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,2047,0.25497599442799884
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,2047,0.33741335074106854
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,2047,0.25190399090449017
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,2047,0.24337067206700644
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,2047,0.24510933955510458
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,2047,0.24166399240493774
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,2047,0.258735994497935
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,2047,0.2566986680030823
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,2047,0.24200000365575156
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,2047,0.24370666344960532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,2047,0.3367040157318115
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,2047,0.25225067138671875
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,2047,0.24300267299016318
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,2047,0.25702399015426636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,4095,0.4780373175938924
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,4095,0.4691679875055949
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,4095,0.6353919903437296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,4095,0.4613066514333089
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,4095,0.4602880080540975
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,4095,0.45721598466237384
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,4095,0.46234134833017987
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,4095,0.45892266432444256
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,4095,0.47667733828226727
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,4095,0.469157338142395
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,4095,0.6374293168385824
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,4095,0.46301865577697754
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,4095,0.45960533618927
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,4095,0.46267199516296387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,8191,0.8567519982655843
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,8191,0.9238186677296957
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,8191,1.228111982345581
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,8191,0.914090633392334
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,8191,0.9041919708251953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,8191,0.9048799673716227
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,8191,0.9028213024139404
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,8191,0.8581120173136393
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,8191,0.920746644337972
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,8191,0.9144319693247477
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,8191,0.9065813223520914
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,8191,0.9048746426900228
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,8191,0.9038453102111816
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,8191,1.2315253416697185
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,16383,1.6648532549540203
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,16383,1.838762601216634
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,16383,2.5115307172139487
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,16383,1.8232693672180176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,16383,1.8092373212178547
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,16383,1.8089067141215007
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,16383,1.81058136622111
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,16383,1.6644906997680664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,16383,1.8432000478108723
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,16383,2.5082880655924478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,16383,1.8106133143107097
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,16383,1.8181173006693523
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,16383,1.8088960647583008
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,16383,1.81059726079305
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,512,1,1,32767,3.2808958689371743
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,512,1,2,32767,3.582634607950846
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,512,1,4,32767,5.252266565958659
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,512,1,8,32767,3.570005416870117
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,512,1,16,32767,3.5602986017862954
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,512,1,32,32767,3.5517441431681314
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,512,1,64,32767,3.544917424519857
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,512,1,1,32767,3.2745866775512695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,1,0.0645066648721695
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,1,0.09249599774678548
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,1,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,1,0.1153546671072642
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,1,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,1,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,1,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,1,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,1,0.09387200077374776
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,512,1,4,32767,5.232810656229655
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,1,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,1,0.11673067013422649
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,512,1,2,32767,3.5911680857340493
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,1,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,1,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,1,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,3,0.06692266464233398
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,3,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,3,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,3,0.0529120018084844
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,3,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,3,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,3,0.05187733471393585
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,512,1,8,32767,3.552090644836426
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,512,1,32,32767,3.5496959686279297
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,3,0.11706133683522542
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,3,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,512,1,64,32767,3.5409971872965493
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,3,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,512,1,16,32767,3.562666575113932
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,3,0.05324266850948334
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,3,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,3,0.09215999643007915
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,3,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,7,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,7,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,7,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,7,0.05187733471393585
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,7,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,7,0.11776533722877502
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,7,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,7,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,7,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,7,0.06655466556549072
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,7,0.11673600474993388
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,7,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,7,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,7,0.0942133367061615
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,15,0.0536106675863266
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,15,0.0532533327738444
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,15,0.06621333460013072
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,15,0.1160533328851064
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,15,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,15,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,15,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,15,0.09386666615804036
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,15,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,15,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,15,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,15,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,15,0.09386666615804036
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,15,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,31,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,31,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,31,0.11673067013422649
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,31,0.06621333460013072
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,31,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,31,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,31,0.09318400422732036
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,31,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,31,0.11570666233698527
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,31,0.05356266597906748
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,31,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,31,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,31,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,31,0.09283733367919922
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,63,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,63,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,63,0.0682773341735204
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,63,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,63,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,63,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,63,0.09318400422732036
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,63,0.05393599967161814
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,63,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,63,0.0689386675755183
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,63,0.11567999919255574
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,63,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,63,0.052298665046691895
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,127,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,127,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,127,0.061093335350354515
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,63,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,127,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,127,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,127,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,127,0.09522666533788045
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,127,0.1181066632270813
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,127,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,127,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,127,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,127,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,127,0.06894400219122569
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,127,0.09557333588600159
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,255,0.07407466570536296
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,255,0.08328000207742055
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,255,0.07133866846561432
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,255,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,255,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,255,0.1013813316822052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,255,0.13499733805656433
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,255,0.07406400144100189
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,255,0.08362666765848796
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,255,0.1225386659304301
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,255,0.07303999861081441
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,255,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,255,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,255,0.13346133629480997
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,511,0.14301333824793497
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,511,0.1358506679534912
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,511,0.20736000935236612
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,511,0.13140799601872763
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,511,0.15106133619944254
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,511,0.18278932571411133
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,511,0.13329600294431052
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,511,0.1430239975452423
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,511,0.12971199552218118
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,511,0.1327839990456899
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,511,0.207370658715566
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,511,0.13243732849756876
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,511,0.18006932735443115
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,511,0.15002133448918661
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,1023,0.23278399308522543
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,1023,0.2317919929822286
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,1023,0.2362026572227478
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,1023,0.24609599510828653
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,1023,0.3551520109176636
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,1023,0.2539520064989726
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,1023,0.2879146734873454
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,1023,0.2868906656901042
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,1023,0.2303946614265442
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,1023,0.35447466373443604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,1023,0.23380800088246664
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,1023,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,1023,0.24473067124684653
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,1023,0.25668267409006756
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,2047,0.4384266535441081
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,2047,0.4469706614812215
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,2047,0.6560373306274414
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,2047,0.4312746524810791
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,2047,0.4381066560745239
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,2047,0.49510399500528973
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,2047,0.4599413474400838
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,2047,0.49544533093770343
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,2047,0.4360479911168416
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,2047,0.4479946692784627
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,2047,0.43705066045125324
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,2047,0.65775465965271
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,2047,0.45925867557525635
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,2047,0.4326346715291341
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,4095,0.8959946632385254
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,4095,0.8717652956644694
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,4095,0.8509439627329508
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,4095,0.8587946891784668
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,4095,1.2492799758911133
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,4095,0.8461706638336182
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,4095,0.8533226648966471
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,4095,0.8939519723256429
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,4095,1.2526933352152507
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,4095,0.8710827032725016
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,4095,0.8584427038828532
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,4095,0.8516266345977783
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,4095,0.8437706629435221
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,4095,0.8465279738108317
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,8191,1.6998400688171387
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,8191,1.7022347450256348
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,8191,2.4301279385884604
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,8191,1.7039413452148438
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,8191,1.679701328277588
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,8191,1.6913013458251953
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,8191,1.6885813077290852
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,8191,1.6964319547017415
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,8191,1.7123039563496907
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,8191,2.4390080769856772
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,8191,1.6957440376281738
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,8191,1.679690678914388
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,8191,1.6878933906555176
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,8191,1.6896053949991863
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,128,1024,1,1,16383,3.313664118448893
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,64,1024,1,2,16383,3.4416640599568686
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,8,1024,1,16,16383,3.382783889770508
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,4,1024,1,32,16383,3.3885812759399414
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,2,1024,1,64,16383,3.3937066396077475
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,32,1024,1,4,16383,5.026303927103679
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,fp8,16,1024,1,8,16383,3.391317367553711
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,128,1024,1,1,16383,3.3123038609822593
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,32,1024,1,4,16383,4.984997431437175
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,64,1024,1,2,16383,3.4150400161743164
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,4,1024,1,32,16383,3.400885264078776
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,16,1024,1,8,16383,3.3978026707967124
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,8,1024,1,16,16383,3.3937012354532876
TRTLLM,1.0.0rc6,NVIDIA B200,mla_generation,default,float16,float16,2,1024,1,64,16383,3.405658721923828
