framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,16,1,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,16,2,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,16,4,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,16,16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,16,8,0,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,16,32,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,16,64,0,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,16,128,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,16,1,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,16,2,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,16,4,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,16,8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,16,16,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,16,32,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,16,128,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,32,4,0,0.03027733415365219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,16,64,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,32,64,0,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,32,1,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,32,2,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,32,128,0,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,32,16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,32,8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,32,32,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,32,1,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,32,2,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,32,4,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,32,8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,32,16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,32,32,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,32,64,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,32,128,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,64,1,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,64,2,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,64,4,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,64,8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,64,16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,64,32,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,64,64,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,64,128,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,64,1,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,64,2,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,64,4,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,64,8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,128,1,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,64,16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,64,32,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,64,64,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,64,128,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,128,32,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,128,2,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,128,4,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,128,8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,128,16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,128,64,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,128,128,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,128,1,0,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,128,2,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,128,4,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,128,8,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,128,16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,128,32,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,128,64,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,128,128,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,256,1,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,256,2,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,256,4,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,256,1,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,256,8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,256,4,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,256,16,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,256,32,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,256,32,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,256,64,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,256,128,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,256,2,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,256,8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,256,16,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,256,64,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,256,128,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,512,1,0,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,512,2,0,0.026170666019121807
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,512,4,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,512,8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,512,16,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,512,32,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,512,64,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,512,128,0,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,512,1,0,0.061861331264177956
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,512,2,0,0.03310399999221166
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,512,64,0,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,512,4,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,512,8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,1024,2,0,0.053904001911481224
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,1024,4,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,512,16,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,512,32,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,512,128,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,1024,1,0,0.08672533432642619
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,1024,8,0,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,1024,128,0,0.039221333960692085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,1024,16,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,1024,32,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,1024,64,0,0.029696000119050343
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,1024,1,0,0.11834667126337688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,1024,2,0,0.07201066613197327
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,1024,4,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,1024,128,0,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,1024,8,0,0.034586665530999504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,1024,16,0,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,1024,32,0,0.03254933406909307
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,1024,64,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,1536,1,0,0.14220800002415976
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,1536,2,0,0.08940266569455464
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,1536,4,0,0.05516799787680308
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,1536,8,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,1536,16,0,0.03769599894682566
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,1536,32,0,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,1536,64,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,1536,128,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,1536,1,0,0.1859253247578939
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,1536,2,0,0.11364266276359558
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,1536,4,0,0.06651199857393901
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,1536,8,0,0.04238933324813843
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,1536,16,0,0.04005333284536997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,1536,32,0,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,1536,128,0,0.0378560001651446
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,1536,64,0,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,2048,1,0,0.21256534258524576
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,2048,64,0,0.04345599810282389
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,2048,2,0,0.12584533294041952
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,2048,4,0,0.07849066456158955
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,2048,2,0,0.14804800351460776
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,2048,8,0,0.04876266419887543
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,2048,16,0,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,2048,32,0,0.0436160018046697
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,2048,128,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,2048,1,0,0.2589226762453715
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,2048,4,0,0.0935040016969045
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,2048,8,0,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,2048,16,0,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,3072,4,0,0.13192533453305563
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,2048,32,0,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,2048,64,0,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,2048,128,0,0.04433066646258036
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,3072,1,0,0.4028213421503703
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,3072,2,0,0.21762667099634805
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,3072,8,0,0.08406399687131245
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,3072,16,0,0.05957333246866862
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,3072,32,0,0.057989334066708885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,3072,64,0,0.056645333766937256
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,3072,128,0,0.05593599875768026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,3072,1,0,0.43983999888102215
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,3072,2,0,0.24315200249354044
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,3072,4,0,0.14711999893188477
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,3072,8,0,0.08779199918111165
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,3072,16,0,0.05975466469923655
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,4096,1,0,0.6386773188908895
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,3072,32,0,0.05600533386071523
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,3072,64,0,0.05542933444182078
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,3072,128,0,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,4096,2,0,0.34379732608795166
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,4096,4,0,0.19590934117635092
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,4096,8,0,0.12738133470217386
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,4096,16,0,0.0765173335870107
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,4096,32,0,0.07165333131949107
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,4096,64,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,4096,128,0,0.06842666864395142
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,4096,1,0,0.6583466529846191
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,4096,64,0,0.06844800213972728
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,4096,2,0,0.3575093348821004
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,4096,4,0,0.20501333475112915
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,4096,8,0,0.1276533305644989
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,4096,16,0,0.07446399827798207
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,6144,16,0,0.13525866468747458
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,4096,32,0,0.0684799998998642
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,4096,128,0,0.06647466619809468
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,6144,1,0,1.289690653483073
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,6144,2,0,0.668725331624349
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,6144,4,0,0.3682560125986735
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,6144,8,0,0.2166986664136251
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,6144,32,0,0.10004799564679463
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,6144,64,0,0.09800533453623454
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,6144,128,0,0.09682666261990865
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,6144,1,0,1.202618678410848
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,6144,2,0,0.6393546660741171
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,6144,4,0,0.3569706678390503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,6144,8,0,0.2119040091832479
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,6144,32,0,0.09314133723576863
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,6144,16,0,0.13387200236320496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,6144,64,0,0.09058133761088054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,6144,128,0,0.08987200260162354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,8192,1,0,2.1586507161458335
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,8192,2,0,1.1106879711151123
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,8192,4,0,0.5867679913838705
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,8192,8,0,0.3359946807225545
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,8192,16,0,0.21207465728123984
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,8192,32,0,0.132341335217158
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,8192,64,0,0.1260426640510559
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,8192,128,0,0.12387733658154805
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,8192,1,0,1.9445813496907551
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,8192,2,0,0.9938293298085531
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,8192,4,0,0.5419573386510214
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,8192,8,0,0.3172373374303182
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,8192,16,0,0.19915199279785156
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,8192,32,0,0.12030933300654094
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,8192,64,0,0.11513599753379822
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,8192,128,0,0.11369599898656209
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,10240,1,0,3.472112019856771
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,10240,2,0,1.6466827392578125
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,10240,4,0,0.8816800117492676
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,10240,8,0,0.4811946551005046
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,10240,16,0,0.29333333174387616
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,10240,32,0,0.17688000202178955
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,10240,64,0,0.15466133753458658
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,10240,4,0,0.7683573563893636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,10240,128,0,0.15427733461062113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,10240,16,0,0.270197331905365
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,10240,2,0,1.4253066380818684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,10240,1,0,2.8453919092814126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,10240,128,0,0.1360160013039907
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,10240,8,0,0.43324267864227295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,10240,32,0,0.16135467092196146
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,10240,64,0,0.14035733540852866
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,12288,4,0,1.190346638361613
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,12288,8,0,0.6485226551691691
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,12288,2,0,2.481162707010905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,12288,1,0,4.994453430175781
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,12288,16,0,0.3848533233006795
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,12288,32,0,0.240447998046875
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,12288,64,0,0.18333333730697632
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,12288,128,0,0.18050666650136313
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,12288,1,0,3.898127873738607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,12288,2,0,1.937119960784912
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,12288,8,0,0.5738879839579264
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,12288,4,0,1.0410613218943279
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,12288,16,0,0.3447466691335042
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,12288,128,0,0.15893866618474325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,12288,64,0,0.16274133324623108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,12288,32,0,0.2212000091870626
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,1,16384,4,0,2.08788267771403
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,1,16384,1,0,8.557088216145834
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,1,16384,64,0,0.26577067375183105
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,1,16384,16,0,0.6167680025100708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,1,16384,8,0,1.0732746918996174
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,1,16384,2,0,4.23090140024821
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,1,16384,32,0,0.38552534580230713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,1,16384,128,0,0.24295467138290405
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,1,16384,1,0,6.84885851542155
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,1,16384,2,0,3.216479937235514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,1,16384,4,0,1.6729386647542317
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,1,16384,8,0,0.914031982421875
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,1,16384,16,0,0.5298399925231934
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,1,16384,32,0,0.3380906581878662
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,1,16384,64,0,0.21474667390187582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,16,1,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,1,16384,128,0,0.20683199167251587
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,16,2,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,16,4,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,16,8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,16,16,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,16,32,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,16,64,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,16,128,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,16,1,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,16,2,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,16,4,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,16,8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,16,16,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,16,32,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,16,64,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,16,128,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,32,1,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,32,2,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,32,4,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,32,8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,32,16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,32,32,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,32,64,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,32,128,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,32,1,0,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,32,2,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,32,4,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,32,8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,32,16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,32,32,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,32,64,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,32,128,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,64,1,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,64,2,0,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,64,4,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,64,8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,64,16,0,0.016645333419243496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,64,32,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,64,128,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,64,64,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,64,2,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,64,4,0,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,64,1,0,0.028016000986099243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,64,8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,64,16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,64,32,0,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,64,64,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,64,128,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,128,1,0,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,128,4,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,128,2,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,128,8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,128,16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,128,64,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,128,32,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,128,128,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,128,2,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,128,1,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,128,4,0,0.022517333428064983
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,128,8,0,0.022490667800108593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,128,16,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,128,32,0,0.021509334444999695
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,128,128,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,128,64,0,0.021727999051411945
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,256,1,0,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,256,2,0,0.0215786670645078
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,256,4,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,256,8,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,256,16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,256,32,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,256,64,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,256,128,0,0.022682666778564453
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,256,16,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,256,2,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,256,1,0,0.05611733098824819
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,256,4,0,0.026047999660174053
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,256,8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,256,32,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,256,64,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,256,128,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,512,1,0,0.06816533207893372
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,512,2,0,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,512,4,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,512,8,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,512,16,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,512,32,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,512,64,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,512,128,0,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,512,1,0,0.10335999727249146
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,512,64,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,512,2,0,0.06423466900984447
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,1024,1,0,0.1497119963169098
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,512,4,0,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,512,8,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,512,16,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,512,32,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,512,128,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,1024,2,0,0.08833066622416179
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,1024,4,0,0.05537599821885427
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,1024,1,0,0.20779200394948324
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,1024,8,0,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,1024,16,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,1024,32,0,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,1024,64,0,0.02979733298222224
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,1024,128,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,1024,2,0,0.11738133430480957
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,1024,64,0,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,1024,4,0,0.07207466661930084
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,1536,2,0,0.14363732933998108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,1024,8,0,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,1024,16,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,1024,32,0,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,1024,128,0,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,1536,1,0,0.25860265890757245
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,1536,4,0,0.0888213316599528
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,1536,8,0,0.05462933580080668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,1536,16,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,1536,32,0,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,1536,64,0,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,1536,128,0,0.03598399957021078
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,1536,32,0,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,1536,1,0,0.3341386715571086
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,1536,2,0,0.18645334243774414
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,1536,4,0,0.11250666777292888
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,1536,8,0,0.06594666838645935
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,1536,16,0,0.043151999513308205
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,1536,64,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,1536,128,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,2048,1,0,0.3985866705576579
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,2048,64,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,2048,2,0,0.21521600087483725
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,2048,4,0,0.1252906620502472
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,2048,8,0,0.08076266447703044
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,2048,16,0,0.04760533571243286
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,2048,32,0,0.046165332198143005
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,2048,128,0,0.04363200068473816
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,2048,1,0,0.4795253276824951
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,2048,2,0,0.2587573329607646
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,2048,128,0,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,2048,4,0,0.1488053301970164
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,3072,1,0,0.7680373191833496
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,2048,8,0,0.09128533800443013
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,2048,16,0,0.05162666738033295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,2048,32,0,0.047456001242001854
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,2048,64,0,0.0452106644709905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,3072,2,0,0.3981226682662964
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,3072,64,0,0.05782933533191681
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,3072,128,0,0.05784533421198527
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,3072,4,0,0.2178773283958435
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,3072,8,0,0.1316106617450714
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,3072,16,0,0.08474666873613994
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,3072,32,0,0.06051200131575266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,3072,1,0,0.8307092984517416
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,3072,2,0,0.43963201840718585
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,3072,4,0,0.24486400683720908
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,4096,1,0,1.2432586352030437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,3072,8,0,0.14643200238545737
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,3072,16,0,0.09053867061932881
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,3072,32,0,0.060080001751581825
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,3072,64,0,0.05766933163007101
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,3072,128,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,4096,2,0,0.639413317044576
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,4096,4,0,0.34271466732025146
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,4096,8,0,0.19709332784016928
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,4096,16,0,0.1295093297958374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,4096,32,0,0.07691200077533722
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,4096,64,0,0.0732426643371582
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,4096,16,0,0.12955733140309653
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,4096,128,0,0.0708426684141159
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,4096,1,0,1.2678240140279133
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,4096,2,0,0.6624053319295248
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,4096,4,0,0.3585333426793416
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,4096,8,0,0.20441067218780518
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,4096,32,0,0.07644799848397572
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,4096,64,0,0.07041599849859874
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,4096,128,0,0.06804800033569336
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,6144,1,0,2.6436479886372886
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,6144,2,0,1.2836000124613445
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,6144,4,0,0.6696159839630127
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,6144,8,0,0.365392009417216
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,6144,16,0,0.2189813256263733
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,6144,32,0,0.1397226651509603
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,6144,64,0,0.10283199946085612
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,6144,128,0,0.09898666540781657
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,6144,1,0,2.3423360188802085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,6144,2,0,1.2083199818929036
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,6144,4,0,0.6396960020065308
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,6144,8,0,0.3569973309834798
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,6144,16,0,0.21455466747283936
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,6144,32,0,0.13763733704884848
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,6144,64,0,0.09715200463930766
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,6144,128,0,0.09269866347312927
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,8192,1,0,4.618762652079265
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,8192,2,0,2.1458560625712075
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,8192,4,0,1.10045329729716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,8192,8,0,0.5900426705678304
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,8192,16,0,0.3370933135350545
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,8192,4,0,1.0003680388132732
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,8192,8,0,0.5436000029246012
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,8192,32,0,0.21547200282414755
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,8192,64,0,0.14662933349609375
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,8192,128,0,0.12971733013788858
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,8192,1,0,3.872351964314779
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,8192,2,0,1.9072052637736003
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,8192,16,0,0.31674667199452716
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,10240,2,0,3.57258669535319
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,10240,4,0,1.661077340443929
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,8192,64,0,0.12598400314648947
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,8192,32,0,0.20402133464813232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,8192,128,0,0.11760000387827556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,10240,1,0,7.309279759724935
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,10240,8,0,0.8636159896850586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,10240,16,0,0.4814773400624593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,10240,32,0,0.2983413338661194
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,10240,64,0,0.1865546703338623
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,10240,128,0,0.16848532358805338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,10240,4,0,1.455674648284912
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,10240,1,0,5.866591771443685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,10240,2,0,2.925413449605306
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,10240,16,0,0.43908266226450604
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,10240,8,0,0.768608013788859
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,10240,32,0,0.2725600004196167
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,12288,1,0,11.458880106608072
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,10240,64,0,0.16916799545288086
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,10240,128,0,0.1439786652723948
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,12288,2,0,4.842421213785808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,12288,8,0,1.1996320088704426
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,12288,4,0,2.4819893836975098
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,12288,16,0,0.6516853173573812
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,12288,32,0,0.3875573476155599
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,12288,64,0,0.24478399753570557
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,12288,1,0,8.057530721028646
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,12288,32,0,0.35150400797526044
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,12288,128,0,0.2096959948539734
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,12288,2,0,3.972655932108561
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,12288,4,0,1.9674933751424153
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,12288,8,0,1.035871982574463
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,12288,16,0,0.5804373423258463
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,12288,64,0,0.2302186687787374
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,12288,128,0,0.17100266615549722
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,2,16384,8,0,2.125973383585612
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,2,16384,64,0,0.3906826575597127
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,2,16384,2,0,8.615834554036459
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,2,16384,16,0,1.1066880226135254
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,2,16384,4,0,3.995295842488607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,2,16384,32,0,0.6130186716715494
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,2,16384,128,0,0.2823093334833781
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,2,16384,1,0,18.085514068603516
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,2,16384,4,0,3.39680544535319
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,2,16384,16,0,0.9165066878000895
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,2,16384,2,0,7.462464014689128
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,2,16384,8,0,1.6711254119873047
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,2,16384,128,0,0.22445333003997803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,2,16384,32,0,0.5351146856943766
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,2,16384,64,0,0.3410613139470418
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,2,16384,1,0,13.716164906819662
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,16,1,0,0.02565866708755493
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,16,8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,16,2,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,16,4,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,16,16,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,16,32,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,16,64,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,16,128,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,16,2,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,16,1,0,0.030586667358875275
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,16,4,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,16,8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,16,16,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,16,32,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,16,64,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,16,128,0,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,32,1,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,32,2,0,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,32,4,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,32,64,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,32,8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,32,16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,32,32,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,32,1,0,0.03183999905983607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,32,128,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,32,2,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,32,8,0,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,32,4,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,32,16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,32,64,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,32,32,0,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,32,128,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,64,4,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,64,2,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,64,1,0,0.032586666444937386
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,64,32,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,64,64,0,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,64,8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,64,16,0,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,64,128,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,64,2,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,64,1,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,64,4,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,64,8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,64,16,0,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,64,32,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,64,64,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,128,1,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,64,128,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,128,2,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,128,8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,128,4,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,128,16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,128,64,0,0.02013333390156428
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,128,32,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,128,128,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,128,1,0,0.0601440022389094
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,128,2,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,128,4,0,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,128,8,0,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,128,16,0,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,128,32,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,128,64,0,0.021498667697111767
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,128,128,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,256,1,0,0.060677334666252136
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,256,2,0,0.03392533212900162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,256,4,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,256,8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,256,16,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,256,32,0,0.02025066688656807
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,256,64,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,256,128,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,256,1,0,0.09567466378211975
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,256,2,0,0.05638933181762695
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,256,4,0,0.031061333914597828
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,256,8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,256,32,0,0.024714666108290356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,256,16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,256,64,0,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,512,1,0,0.11720533172289531
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,256,128,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,512,32,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,512,2,0,0.06763733426729839
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,512,128,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,512,4,0,0.042506664991378784
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,512,8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,512,16,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,512,64,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,512,1,0,0.18153599898020426
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,512,2,0,0.10283733407656352
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,512,4,0,0.06404800216356914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,512,8,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,512,32,0,0.03242666771014532
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,512,16,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,512,64,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,512,128,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,1024,1,0,0.27703466018040973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,1024,2,0,0.15032000343004862
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,1024,4,0,0.08747733632723491
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,1024,8,0,0.05555733541647593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,1024,16,0,0.0339626669883728
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,1024,32,0,0.03204799940188726
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,1024,64,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,1024,128,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,1024,1,0,0.38631999492645264
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,1024,2,0,0.20814400911331177
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,1024,4,0,0.11991999546686809
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,1024,8,0,0.07437866429487865
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,1024,16,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,1024,32,0,0.035445332527160645
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,1024,64,0,0.03551466763019562
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,1024,128,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,1536,1,0,0.4970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,1536,2,0,0.2606026728947957
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,1536,4,0,0.14428800344467163
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,1536,8,0,0.0913866659005483
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,1536,16,0,0.05606399973233541
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,1536,32,0,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,1536,64,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,1536,128,0,0.038165333370367684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,1536,1,0,0.6340159972508749
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,1536,2,0,0.3350133498509725
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,1536,4,0,0.18548800547917685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,1536,8,0,0.11250666777292888
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,1536,16,0,0.0683840016523997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,1536,32,0,0.04286933441956838
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,2048,16,0,0.08211199939250946
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,1536,64,0,0.04168533285458883
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,1536,128,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,2048,1,0,0.7766933441162109
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,2048,2,0,0.40083734194437665
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,2048,4,0,0.2152106761932373
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,2048,8,0,0.12571199735005698
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,2048,32,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,2048,64,0,0.04595733185609182
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,2048,128,0,0.04590400060017904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,2048,1,0,0.9240907033284506
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,2048,2,0,0.4802773396174113
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,2048,4,0,0.25945067405700684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,2048,8,0,0.151146670182546
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,2048,16,0,0.09333866834640503
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,3072,8,0,0.22012799978256226
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,2048,32,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,2048,64,0,0.047295997540156044
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,2048,128,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,3072,1,0,1.4989387194315593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,3072,2,0,0.7708746592203776
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,3072,4,0,0.40268266201019287
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,3072,16,0,0.13333333532015482
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,3072,32,0,0.0881119966506958
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,3072,64,0,0.06406400104363759
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,3072,128,0,0.06644266843795776
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,3072,8,0,0.24540799856185913
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,3072,1,0,1.6213599840799968
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,3072,2,0,0.8345332940419515
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,3072,4,0,0.4402666489283244
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,3072,16,0,0.14874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,3072,32,0,0.09325333436330159
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,3072,64,0,0.062314664324124656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,3072,128,0,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,4096,1,0,2.6873652140299478
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,4096,2,0,1.243232011795044
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,4096,4,0,0.642303983370463
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,4096,8,0,0.3473333517710368
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,4096,16,0,0.19983466466267905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,4096,32,0,0.13152533769607544
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,4096,64,0,0.08706667025883992
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,4096,128,0,0.0748586654663086
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,4096,1,0,2.582869370778402
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,4096,2,0,1.2730080286661785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,4096,4,0,0.6738293170928955
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,4096,8,0,0.36638398965199787
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,4096,16,0,0.20840533574422201
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,4096,32,0,0.13286933302879333
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,4096,64,0,0.0800799975792567
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,4096,128,0,0.07254933317502339
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,6144,1,0,5.521034876505534
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,6144,4,0,1.2879892985026042
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,6144,2,0,2.69102414449056
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,6144,16,0,0.3710453510284424
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,6144,8,0,0.6789759794871012
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,6144,32,0,0.2232746680577596
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,6144,64,0,0.14776532848676047
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,6144,128,0,0.11857600013415019
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,6144,2,0,2.3770079612731934
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,6144,1,0,4.996629397074382
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,6144,4,0,1.2115840117136638
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,6144,8,0,0.6414293448130289
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,6144,16,0,0.36125866572062176
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,6144,32,0,0.21840532620747885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,6144,64,0,0.14177599549293518
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,6144,128,0,0.10318400462468465
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,4,8192,1,0,9.428410847981771
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,4,8192,2,0,4.844677289326985
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,4,8192,128,0,0.15904532869656882
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,4,8192,4,0,2.2564160029093423
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,4,8192,8,0,1.1149439811706543
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,4,8192,16,0,0.6036586761474609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,4,8192,32,0,0.34358398119608563
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,4,8192,64,0,0.22202134132385254
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,4,8192,2,0,4.112074534098308
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,4,8192,8,0,1.0047093232472737
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,4,8192,4,0,1.942314624786377
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,4,8192,16,0,0.5479520161946615
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,4,8192,1,0,7.966698964436849
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,4,8192,64,0,0.20985066890716553
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,4,8192,32,0,0.3187946677207947
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,4,8192,128,0,0.13622933626174927
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,16,1,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,16,2,0,0.02651199946800868
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,16,4,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,16,8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,16,16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,16,32,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,16,64,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,16,128,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,16,1,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,16,2,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,16,4,0,0.025989333788553875
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,16,8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,16,16,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,16,32,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,16,64,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,16,128,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,32,1,0,0.03889599939187368
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,32,2,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,32,64,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,32,4,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,32,8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,32,16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,32,32,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,32,128,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,32,1,0,0.04655999938646952
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,32,2,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,32,4,0,0.03392533212900162
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,32,8,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,32,16,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,32,32,0,0.020549333343903225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,32,64,0,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,32,128,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,64,1,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,64,2,0,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,64,4,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,64,8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,64,16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,64,32,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,64,64,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,64,128,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,64,1,0,0.06829866766929626
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,64,2,0,0.042634665966033936
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,64,4,0,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,64,8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,64,32,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,64,16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,64,64,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,64,128,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,128,1,0,0.05938133100668589
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,128,2,0,0.03700799991687139
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,128,4,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,128,8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,128,16,0,0.036015999813874565
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,128,32,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,128,64,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,128,128,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,128,1,0,0.09910399715105693
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,128,2,0,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,128,4,0,0.03187733391920725
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,128,8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,128,16,0,0.02162133405605952
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,128,32,0,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,128,128,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,128,64,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,256,1,0,0.10073066751162212
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,256,2,0,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,256,4,0,0.034688000877698265
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,256,8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,256,16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,256,32,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,256,64,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,256,128,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,256,1,0,0.16643200318018594
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,256,2,0,0.09692266583442688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,256,4,0,0.0573226660490036
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,256,8,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,256,16,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,256,32,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,256,64,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,512,8,0,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,256,128,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,512,1,0,0.21580266952514648
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,512,2,0,0.11827199657758077
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,512,4,0,0.0674773355325063
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,512,16,0,0.027978666126728058
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,512,8,0,0.06266666452089946
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,512,32,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,512,64,0,0.02610666553179423
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,512,128,0,0.024149333437283833
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,512,1,0,0.3375146786371867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,512,2,0,0.18227734168370566
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,512,4,0,0.1053546667098999
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,512,16,0,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,1024,1,0,0.5369066794713339
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,512,32,0,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,512,64,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,512,128,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,1024,2,0,0.2785653273264567
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,1024,4,0,0.15065067013104758
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,1024,8,0,0.08964799841245015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,1024,16,0,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,1024,32,0,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,1024,64,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,1024,128,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,1024,1,0,0.7445493539174398
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,1024,2,0,0.3905866543451945
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,1024,4,0,0.20832000176111856
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,1024,8,0,0.1192586620648702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,1024,16,0,0.075013334552447
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,1024,32,0,0.04095466683308283
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,1024,64,0,0.03782933453718821
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,1024,128,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,1536,1,0,1.0032479763031006
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,1536,2,0,0.4994560082753499
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,1536,64,0,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,1536,4,0,0.2619679967562358
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,1536,8,0,0.1474026640256246
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,1536,2,0,0.6386186679204305
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,1536,16,0,0.09157333771387736
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,1536,32,0,0.060933331648508705
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,1536,128,0,0.03995199998219808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,1536,1,0,1.2374133268992107
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,1536,4,0,0.33589335282643634
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,1536,8,0,0.18732800086339316
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,1536,16,0,0.11547199885050456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,1536,32,0,0.07389866809050243
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,1536,64,0,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,1536,128,0,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,2048,16,0,0.1302186648050944
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,2048,2,0,0.7988213698069254
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,2048,1,0,1.58186674118042
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,2048,4,0,0.4066506624221802
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,2048,8,0,0.21966399749120077
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,2048,32,0,0.0869706670443217
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,2048,64,0,0.055776000022888184
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,2048,128,0,0.048058668772379555
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,2048,1,0,1.8196533521016438
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,2048,2,0,0.928447961807251
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,2048,4,0,0.4844213326772054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,2048,8,0,0.2622399926185608
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,2048,16,0,0.15242133537928262
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,2048,32,0,0.09726400176684062
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,2048,64,0,0.058277333776156105
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,2048,128,0,0.04990933338801066
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,3072,1,0,2.9904534022013345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,3072,2,0,1.5645599365234375
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,3072,4,0,0.7761867046356201
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,3072,8,0,0.4091200033823649
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,3072,16,0,0.22710400819778442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,3072,32,0,0.1400106648604075
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,3072,64,0,0.09423999985059102
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,3072,128,0,0.072543998559316
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,3072,1,0,3.208730697631836
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,3072,2,0,1.635423978169759
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,3072,4,0,0.8405066331227621
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,3072,8,0,0.4436746835708618
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,3072,16,0,0.2497119903564453
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,3072,32,0,0.15203733245531717
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,3072,64,0,0.09954133629798889
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,3072,128,0,0.06842666864395142
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,8,4096,2,0,3.2186400095621743
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,8,4096,4,0,1.2618079980214436
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,8,4096,1,0,5.477994918823242
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,8,4096,8,0,0.6588053305943807
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,8,4096,16,0,0.3523413340250651
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,8,4096,32,0,0.20726933081944784
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,8,4096,2,0,2.51692803700765
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,8,4096,64,0,0.14032000303268433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,8,4096,16,0,0.36557332674662274
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,8,4096,128,0,0.09728533029556274
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,8,4096,4,0,1.2786773045857747
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,8,4096,8,0,0.6691626707712809
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,8,4096,1,0,5.5574080149332685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,8,4096,32,0,0.214303990205129
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,8,4096,64,0,0.14429333806037903
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,8,4096,128,0,0.09084266424179077
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,16,1,0,0.06027733286221822
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,16,2,0,0.03755733370780945
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,16,4,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,16,8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,16,16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,16,32,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,16,64,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,16,128,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,16,1,0,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,16,2,0,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,16,4,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,16,8,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,16,16,0,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,32,4,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,32,16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,32,32,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,16,32,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,16,64,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,16,128,0,0.019727999965349834
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,32,1,0,0.06810133159160614
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,32,2,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,32,8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,32,64,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,32,128,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,32,1,0,0.08592533071835835
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,32,2,0,0.04458666841189066
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,32,4,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,64,1,0,0.07967466612656911
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,32,8,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,32,16,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,32,32,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,32,64,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,32,128,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,64,2,0,0.04751466711362203
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,64,4,0,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,64,8,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,64,16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,64,32,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,64,64,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,64,32,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,64,128,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,64,1,0,0.11902933319409688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,64,128,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,64,2,0,0.06816533207893372
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,64,4,0,0.03751466671625773
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,128,4,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,64,8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,64,16,0,0.021727999051411945
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,64,64,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,128,1,0,0.1039466659228007
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,128,2,0,0.06038933495680491
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,128,1,0,0.17678932348887125
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,128,8,0,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,128,16,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,128,32,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,128,64,0,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,128,128,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,128,2,0,0.09924266735712688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,128,4,0,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,128,8,0,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,128,32,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,128,16,0,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,128,64,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,128,128,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,256,1,0,0.1881600022315979
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,256,2,0,0.10162666440010071
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,256,4,0,0.060906668504079185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,256,8,0,0.03585600107908249
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,256,32,0,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,256,16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,256,64,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,256,128,0,0.020207999895016353
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,256,1,0,0.3105600078900655
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,256,2,0,0.1665013333161672
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,256,4,0,0.09701333443323772
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,256,8,0,0.0583840012550354
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,256,16,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,256,32,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,256,64,0,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,256,128,0,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,512,2,0,0.21780800819396973
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,512,1,0,0.417141318321228
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,512,4,0,0.12001066406567891
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,512,8,0,0.07069333394368489
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,512,16,0,0.04578666885693868
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,512,32,0,0.027893332143624622
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,512,64,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,512,128,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,512,2,0,0.33905065059661865
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,512,64,0,0.031983998914559685
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,512,1,0,0.6498986482620239
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,512,4,0,0.18345065911610922
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,1024,1,0,1.084544022878011
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,1024,2,0,0.5405813455581665
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,1024,4,0,0.2868799964586894
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,512,8,0,0.103493332862854
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,512,16,0,0.06645866731802623
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,512,32,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,512,128,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,1024,1,0,1.479050636291504
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,1024,8,0,0.15546666582425436
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,1024,16,0,0.09408533573150635
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,1024,32,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,1024,64,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,1024,128,0,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,1024,2,0,0.7528479894002279
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,1024,4,0,0.3941226800282796
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,1024,8,0,0.212336003780365
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,1024,16,0,0.12351466218630473
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,1024,32,0,0.07901333272457123
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,1024,64,0,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,1024,128,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,1536,1,0,1.9621814092000325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,1536,2,0,0.9856266975402832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,1536,4,0,0.5058773358662924
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,1536,8,0,0.2675146659215291
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,1536,16,0,0.1534880002339681
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,1536,32,0,0.09777067104975383
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,1536,64,0,0.06807999809583028
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,1536,128,0,0.05003199974695841
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,1536,1,0,2.470208009084066
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,1536,2,0,1.250432014465332
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,1536,4,0,0.642746647198995
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,1536,8,0,0.34276799360911053
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,1536,16,0,0.19363200664520264
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,1536,32,0,0.1192586620648702
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,1536,128,0,0.052704001466433205
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,1536,64,0,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,16,2048,2,0,1.5495519638061523
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,16,2048,1,0,3.208277384440104
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,16,2048,4,0,0.791914701461792
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,16,2048,8,0,0.4095573425292969
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,16,2048,16,0,0.227183997631073
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,16,2048,32,0,0.1383519967397054
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,16,2048,64,0,0.09357333183288574
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,16,2048,1,0,3.6195414861043296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,16,2048,128,0,0.06769066552321117
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,16,2048,4,0,0.9413119951883951
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,16,2048,2,0,1.8300693829854329
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,16,2048,8,0,0.49196799596150714
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,16,2048,16,0,0.2696106632550557
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,16,2048,32,0,0.1585493286450704
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,16,2048,64,0,0.10439466436704
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,32,16,1,0,0.1120746632417043
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,16,2048,128,0,0.06765333314736684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,32,16,2,0,0.06050133208433787
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,32,16,4,0,0.03828266759713491
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,32,16,8,0,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,32,16,16,0,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,32,16,128,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,32,16,64,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,32,16,32,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,32,16,1,0,0.1270240048567454
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,32,16,2,0,0.06594133377075195
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,32,16,8,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,32,16,4,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,32,16,16,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,32,16,32,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,32,32,2,0,0.06859733164310455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,32,16,64,0,0.020138667275508244
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,32,16,128,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,32,32,1,0,0.1209386686484019
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,32,32,8,0,0.027610667049884796
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,32,32,4,0,0.039877332746982574
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,32,32,16,0,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,32,32,32,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,32,32,64,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,32,32,128,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,32,32,2,0,0.08611733714739482
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,32,32,1,0,0.15262400110562643
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,32,32,32,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,32,32,4,0,0.046336000164349876
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,32,32,16,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,32,32,64,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,32,32,128,0,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,32,32,8,0,0.03230399886767069
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,32,64,1,0,0.1423679987589518
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,32,64,2,0,0.07884266475836436
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,32,64,4,0,0.04757866760094961
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,32,64,8,0,0.029877332349618275
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,32,64,32,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,32,64,16,0,0.022240000466505688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,32,64,64,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,32,64,128,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,32,64,1,0,0.21643733978271484
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,32,64,2,0,0.1181173324584961
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,32,64,4,0,0.06751466790835063
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,32,64,128,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,32,128,1,0,0.19500267505645752
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,32,64,16,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,32,64,8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,32,64,32,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,32,64,64,0,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,32,128,2,0,0.10363733768463135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,32,128,4,0,0.060405333836873375
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,32,128,8,0,0.04747733473777771
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,32,128,16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,32,128,32,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,32,128,64,0,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,32,128,128,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,32,128,16,0,0.03376533339420954
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,32,128,1,0,0.3349653482437134
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,32,128,2,0,0.17689067125320435
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,32,128,4,0,0.10037866234779358
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,32,128,8,0,0.06171200176080068
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,32,128,64,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,32,128,32,0,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,32,128,128,0,0.021514666577180225
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,32,256,1,0,0.3630880117416382
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,32,256,2,0,0.19202667474746704
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,32,256,4,0,0.10598400235176086
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,32,256,64,0,0.021530665457248688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,32,256,8,0,0.06323733429114024
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,32,256,32,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,32,256,16,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,32,256,128,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,32,256,1,0,0.6029866536458334
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,32,256,2,0,0.312394658724467
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,32,256,8,0,0.09734400113423665
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,32,256,32,0,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,32,256,4,0,0.16901866594950357
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,32,256,16,0,0.06049066781997681
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,32,256,64,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,32,512,1,0,0.840949296951294
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,32,256,128,0,0.025754667818546295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,32,512,2,0,0.4206453164418538
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,32,512,4,0,0.22221867243448892
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,32,512,16,0,0.07643199960390727
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,32,512,8,0,0.12598400314648947
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,32,512,64,0,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,32,512,32,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,32,512,128,0,0.02808533360560735
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,32,512,1,0,1.2888586521148682
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,32,512,2,0,0.657263994216919
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,32,512,4,0,0.34299735228220624
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,32,512,8,0,0.1849493384361267
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,32,512,16,0,0.10873599847157796
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,32,512,32,0,0.0683840016523997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,32,512,64,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,32,512,128,0,0.03387200087308884
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,32,1024,1,0,2.1511093775431314
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,32,1024,2,0,1.0823893547058105
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,32,1024,64,0,0.07105599840482076
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,32,1024,4,0,0.545253316561381
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,32,1024,8,0,0.28730666637420654
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,32,1024,16,0,0.16428800423940024
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,32,1024,32,0,0.10833066701889038
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,32,1024,128,0,0.05099200208981832
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,32,1024,2,0,1.478933334350586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,32,1024,1,0,2.9309120178222656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,32,1024,128,0,0.056314667065938316
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,32,1024,4,0,0.7588533560434977
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,32,1024,8,0,0.39768532911936444
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,32,1024,16,0,0.21791466077168783
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,32,1024,32,0,0.12850133577982584
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,32,1024,64,0,0.08581866820653279
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,64,16,8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,64,16,1,0,0.20271466175715128
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,64,16,2,0,0.11178666353225708
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,64,16,4,0,0.060602664947509766
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,64,16,1,0,0.23310399055480957
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,64,16,2,0,0.1288533310095469
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,64,16,4,0,0.06644799808661143
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,64,16,32,0,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,64,16,16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,64,16,64,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,64,16,128,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,64,16,8,0,0.04196799794832865
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,64,16,16,0,0.030213333666324615
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,64,16,32,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,64,16,64,0,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,64,16,128,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,64,32,1,0,0.22378667195638022
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,64,32,64,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,64,32,32,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,64,32,2,0,0.12195199728012085
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,64,32,4,0,0.06832000116507213
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,64,32,8,0,0.0408746674656868
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,64,32,16,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,64,32,128,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,64,32,1,0,0.28438933690388996
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,64,32,2,0,0.1520906686782837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,64,32,4,0,0.08557333548863728
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,64,32,8,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,64,32,16,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,64,32,32,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,64,32,64,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,64,32,128,0,0.02182399978240331
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,64,64,1,0,0.27566399176915485
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,64,64,2,0,0.14222932855288187
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,64,64,4,0,0.07890666524569194
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,64,64,8,0,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,64,64,16,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,64,64,32,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,64,64,64,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,64,64,128,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,64,64,1,0,0.41226665178934735
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,64,64,2,0,0.21585599581400552
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,64,64,4,0,0.11785067121187846
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,64,64,8,0,0.06956799825032552
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,64,64,16,0,0.03835200021664301
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,64,64,32,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,64,64,64,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,64,64,128,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,64,128,16,0,0.04022933294375738
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,64,128,1,0,0.3760106563568115
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,64,128,2,0,0.1986400087674459
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,64,128,4,0,0.1060746709505717
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,64,128,8,0,0.06242666641871134
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,64,128,32,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,64,128,64,0,0.019813333948453266
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,64,128,128,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,64,128,1,0,0.6488906542460123
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,64,128,2,0,0.3352959950764974
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,64,128,4,0,0.18042665719985962
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,64,128,8,0,0.10097600022951762
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,64,128,16,0,0.06965866684913635
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,64,128,32,0,0.03469866762558619
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,64,128,64,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,64,128,128,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,64,256,1,0,0.7170506318410238
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,64,256,64,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,64,256,2,0,0.3652533292770386
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,64,256,4,0,0.19505600134531656
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,64,256,8,0,0.10790399710337321
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,64,256,16,0,0.06817600131034851
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,64,256,32,0,0.04309333364168803
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,64,256,128,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,64,256,1,0,1.197482665379842
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,64,256,2,0,0.6083306471506754
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,64,256,4,0,0.31827733914057416
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,64,256,8,0,0.17083199818929037
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,64,256,16,0,0.10155733426411946
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,64,256,32,0,0.06361599763234456
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,64,256,64,0,0.039093332986036934
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,64,256,128,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,64,512,1,0,1.642661412556966
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,64,512,2,0,0.8298453489939371
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,64,512,128,0,0.04275199770927429
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,64,512,4,0,0.42501866817474365
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,64,512,2,0,1.296768029530843
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,64,512,8,0,0.2281493345896403
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,64,512,16,0,0.13221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,64,512,32,0,0.0831413318713506
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,64,512,64,0,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,64,512,1,0,2.5741492907206216
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,64,512,4,0,0.6677066485087076
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,64,512,8,0,0.3487679958343506
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,64,512,16,0,0.19196800390879312
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,64,512,32,0,0.11315733194351196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,128,16,8,0,0.06227200229962667
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,64,512,64,0,0.07654933134714763
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,128,16,32,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,64,512,128,0,0.0499839981396993
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,128,16,1,0,0.3925493160883586
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,128,16,2,0,0.20358934005101523
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,128,16,4,0,0.11177600423494975
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,128,16,16,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,128,16,8,0,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,128,16,64,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,128,16,32,0,0.02975466599067052
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,128,16,128,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,128,16,1,0,0.44494398434956867
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,128,32,1,0,0.43674135208129883
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,128,32,2,0,0.22339733441670737
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,128,16,2,0,0.23402132590611777
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,128,16,4,0,0.12739200393358865
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,128,16,16,0,0.04369066655635834
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,128,16,64,0,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,128,16,128,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,128,32,4,0,0.1213653286298116
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,128,32,8,0,0.06884799897670746
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,128,32,16,0,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,128,32,32,0,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,128,32,64,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,128,32,128,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,128,32,1,0,0.5486186742782593
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,128,32,2,0,0.28547199567159015
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,128,32,4,0,0.1534986694653829
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,128,32,8,0,0.0865280032157898
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,128,32,16,0,0.04553066690762838
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,128,32,32,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,128,32,64,0,0.0268053337931633
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,128,64,16,0,0.05004799862702688
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,128,32,128,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,128,64,1,0,0.5393226544062296
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,128,64,2,0,0.2730933427810669
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,128,64,1,0,0.8071680068969727
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,128,64,4,0,0.14568000038464865
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,128,64,8,0,0.08099199831485748
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,128,64,32,0,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,128,64,64,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,128,64,128,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,128,64,64,0,0.028832000990708668
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,128,64,128,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,128,64,2,0,0.41475733121236164
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,128,64,4,0,0.21824532747268677
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,128,64,8,0,0.11963733037312825
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,128,128,8,0,0.11168533563613892
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,128,64,16,0,0.07222400108973186
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,128,64,32,0,0.04005333284536997
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,128,128,1,0,0.7525599797566732
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,128,128,2,0,0.3814026514689128
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,128,128,4,0,0.20125865936279297
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,128,128,16,0,0.06601066887378693
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,128,128,32,0,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,128,128,64,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,128,128,128,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,128,128,1,0,1.2869760195414226
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,128,128,2,0,0.6576533317565918
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,128,128,4,0,0.3424373467763265
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,128,128,8,0,0.18292800585428873
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,128,128,16,0,0.10373333096504211
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,128,128,32,0,0.06746666630109151
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,128,128,64,0,0.03868266691764196
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,128,256,16,0,0.11743467052777608
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,128,128,128,0,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,128,256,64,0,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,128,256,1,0,1.4191786448160808
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,128,256,2,0,0.7182292938232422
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,128,256,4,0,0.3739466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,128,256,8,0,0.20332266887029013
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,128,256,32,0,0.0782239983479182
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,128,256,128,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,128,256,1,0,2.375216007232666
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,128,256,2,0,1.2044533093770344
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,128,256,4,0,0.6168586810429891
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,128,256,8,0,0.32239999373753864
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,128,256,16,0,0.17764800786972046
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,128,256,32,0,0.10795733332633972
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,128,256,64,0,0.07146133482456207
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,128,256,128,0,0.048341333866119385
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,256,16,2,0,0.38758401075998944
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,256,16,1,0,0.763429323832194
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,256,16,4,0,0.20465066035588583
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,256,16,8,0,0.1128053367137909
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,256,16,16,0,0.06465599934260051
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,256,16,32,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,256,16,64,0,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,256,16,128,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,256,16,1,0,0.871562639872233
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,256,16,2,0,0.44655998547871906
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,256,16,4,0,0.234224001566569
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,256,16,8,0,0.12983999649683634
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,256,16,16,0,0.06810133159160614
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,256,16,32,0,0.04288533329963684
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,256,16,64,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,256,16,128,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,256,32,1,0,0.862010637919108
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,256,32,4,0,0.22585066159566244
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,256,32,2,0,0.436901330947876
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,256,32,1,0,1.0819679896036785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,256,32,8,0,0.12166399757067363
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,256,32,4,0,0.286901334921519
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,256,32,16,0,0.0724480003118515
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,256,32,32,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,256,32,64,0,0.030879999200503033
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,256,32,128,0,0.022613334159056347
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,256,32,2,0,0.5496853192647299
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,256,64,1,0,1.071072022120158
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,256,32,8,0,0.15601066748301187
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,256,32,16,0,0.08912533521652222
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,256,32,32,0,0.04901333153247833
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,256,32,64,0,0.0349386657277743
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,256,32,128,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,256,64,128,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,256,64,2,0,0.5561759869257609
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,256,64,4,0,0.2796960075696309
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,256,64,8,0,0.15040533741315207
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,256,64,16,0,0.08650133013725281
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,256,64,32,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,256,64,16,0,0.12310399611790974
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,256,64,64,0,0.03664000084002813
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,256,64,1,0,1.6091094017028809
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,256,64,2,0,0.813701311747233
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,256,64,4,0,0.4212640126546224
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,256,64,8,0,0.2216320037841797
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,256,64,32,0,0.07550933460394542
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,256,64,64,0,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,256,64,128,0,0.0316746657093366
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,128,256,128,1,0,1.4997280438741047
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,64,256,128,2,0,0.7528800169626871
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,32,256,128,4,0,0.388373335202535
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,64,256,128,2,0,1.3036426703135173
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,16,256,128,8,0,0.20972800254821777
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,8,256,128,16,0,0.1213653286298116
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,4,256,128,32,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,2,256,128,64,0,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,float16,1,256,128,128,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,128,256,128,1,0,2.575354735056559
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,32,256,128,4,0,0.6667679945627848
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,16,256,128,8,0,0.3503093322118123
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,8,256,128,16,0,0.18924800554911295
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,4,256,128,32,0,0.11108799775441487
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,2,256,128,64,0,0.07355199754238129
TRTLLM,1.2.0rc5,NVIDIA B200,mla_context,default,float16,fp8,1,256,128,128,0,0.04884799818197886
