framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,16,1,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,16,2,0,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,16,128,0,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,16,4,0,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,16,8,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,16,2,0,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,16,16,0,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,16,4,0,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,16,64,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,16,32,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,16,8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,16,16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,16,32,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,16,1,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,16,64,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,16,128,0,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,32,1,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,32,2,0,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,32,8,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,32,4,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,32,16,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,32,128,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,32,64,0,0.014394666999578476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,32,32,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,32,2,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,32,1,0,0.020165332903464634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,32,8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,32,4,0,0.021957332889238994
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,32,16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,32,32,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,32,64,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,32,128,0,0.018191999445358913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,64,2,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,64,1,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,64,8,0,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,64,4,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,64,16,0,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,64,32,0,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,64,64,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,64,128,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,64,2,0,0.021722666919231415
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,64,1,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,64,4,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,64,8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,64,16,0,0.01989866668979327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,64,32,0,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,64,128,0,0.020362666497627895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,64,64,0,0.020074666788180668
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,128,1,0,0.021669333179791767
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,128,2,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,128,8,0,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,128,4,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,128,16,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,128,64,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,128,32,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,128,128,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,128,1,0,0.9609173138936361
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,128,2,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,128,8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,128,4,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,128,16,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,128,64,0,0.020469332734743755
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,128,32,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,128,128,0,0.019978666057189304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,256,1,0,0.03275199979543686
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,256,2,0,0.024058667321999867
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,256,4,0,0.02063999945918719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,256,8,0,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,256,16,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,256,64,0,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,256,32,0,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,256,128,0,0.018042666216691334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,256,1,0,0.04612799982229868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,256,2,0,0.026421333352724712
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,256,8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,256,4,0,0.023962666591008503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,256,16,0,0.02382933348417282
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,256,32,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,256,64,0,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,256,128,0,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,512,1,0,0.05912533402442932
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,512,2,0,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,512,8,0,0.02481066683928172
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,512,4,0,0.026346666117509205
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,512,16,0,0.023962666591008503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,512,32,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,512,64,0,0.022437334060668945
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,512,1,0,0.08366400003433228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,512,128,0,0.022426667312781017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,512,2,0,0.05287466446558634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,512,8,0,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,512,16,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,512,4,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,512,32,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,512,64,0,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,512,128,0,0.02658133457104365
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,1024,1,0,0.13481600085894266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,1024,2,0,0.07982400059700012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,1024,4,0,0.05434666574001312
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,1024,32,0,0.03186666717131933
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,1024,8,0,0.03379199902216593
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,1024,16,0,0.032272001107533775
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,1024,128,0,0.030645333230495453
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,1024,64,0,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,1024,1,0,0.17178666591644287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,1024,2,0,0.10102933645248413
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,1024,4,0,0.06486399968465169
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,1024,8,0,0.03754133234421412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,1024,16,0,0.034586665530999504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,1024,32,0,0.034448000291983284
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,1024,64,0,0.033770665526390076
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,1024,128,0,0.03297066688537598
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,1536,1,0,0.2406239906946818
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,1536,2,0,0.13593600193659464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,1536,4,0,0.08271466692288716
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,1536,8,0,0.056287998954455055
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,1536,32,0,0.040378667414188385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,1536,16,0,0.04098133246103922
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,1536,64,0,0.038560000558694206
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,1536,128,0,0.03886933376391729
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,1536,1,0,0.28170667092005414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,1536,2,0,0.15844266613324484
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,1536,4,0,0.09912533561388652
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,1536,8,0,0.06181333462397257
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,1536,32,0,0.04205333193143209
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,1536,64,0,0.04048000027736028
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,1536,16,0,0.042730664213498436
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,1536,128,0,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,2048,1,0,0.3731679916381836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,2048,2,0,0.20684800545374551
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,2048,4,0,0.1225493351618449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,2048,8,0,0.07868800063927968
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,2048,16,0,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,2048,128,0,0.04740266501903534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,2048,32,0,0.048207998275756836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,2048,64,0,0.04700266818205515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,2048,2,0,0.2305333415667216
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,2048,1,0,0.42044798533121747
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,2048,4,0,0.13339199622472128
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,2048,8,0,0.08555733164151509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,2048,128,0,0.048063998421033226
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,2048,64,0,0.04818133513132731
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,2048,32,0,0.048751999934514366
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,2048,16,0,0.05189866820971171
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,3072,1,0,0.7411466439565023
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,3072,2,0,0.3927573362986247
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,3072,8,0,0.13321066896120706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,3072,4,0,0.22114666302998862
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,3072,16,0,0.08809066812197368
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,3072,64,0,0.06357333560784657
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,3072,128,0,0.06280000011126201
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,3072,32,0,0.06453866759936015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,3072,2,0,0.40622401237487793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,3072,1,0,0.765168031056722
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,3072,8,0,0.13910399874051413
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,3072,4,0,0.23174399137496948
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,3072,32,0,0.06458666423956554
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,3072,16,0,0.08973866701126099
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,3072,64,0,0.062234664956728615
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,3072,128,0,0.06196266909440359
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,4096,2,0,0.6428426504135132
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,4096,1,0,1.2258133093516033
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,4096,8,0,0.20617600282033285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,4096,4,0,0.3620906670888265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,4096,16,0,0.13402666648228964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,4096,32,0,0.08155199885368347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,4096,64,0,0.07930133243401845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,4096,128,0,0.07832533121109009
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,4096,1,0,1.2114613056182861
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,4096,2,0,0.6356213490168253
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,4096,8,0,0.19991467396418253
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,4096,4,0,0.34910933176676434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,4096,32,0,0.0804906686147054
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,4096,16,0,0.13075733184814453
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,4096,128,0,0.07427733143170674
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,4096,64,0,0.07566933333873749
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,6144,1,0,2.821205457051595
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,6144,4,0,0.6944800217946371
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,6144,8,0,0.39050134023030597
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,6144,2,0,1.3192799886067708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,6144,64,0,0.11196266611417134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,6144,32,0,0.156058669090271
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,6144,16,0,0.23534933725992838
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,6144,128,0,0.11070400476455688
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,6144,8,0,0.3683626651763916
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,6144,4,0,0.6535199880599976
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,6144,1,0,2.4065759976704917
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,6144,2,0,1.2341439723968506
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,6144,16,0,0.22509332497914633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,6144,128,0,0.10070400436719258
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,6144,32,0,0.14644799629847208
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,6144,64,0,0.10410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,8192,8,0,0.6308639844258627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,8192,2,0,2.420805295308431
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,8192,4,0,1.178160031636556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,8192,16,0,0.3667999903361003
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,8192,32,0,0.23820799589157104
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,8192,1,0,5.0188852945963545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,8192,64,0,0.14894933501879373
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,8192,128,0,0.14327466487884521
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,8192,32,0,0.21913599967956543
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,8192,64,0,0.1388800044854482
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,8192,8,0,0.5833333333333334
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,8192,4,0,1.070799986521403
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,8192,2,0,2.028026739756266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,8192,16,0,0.33956265449523926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,8192,1,0,4.222319920857747
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,8192,128,0,0.1290880044301351
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,10240,4,0,1.7826186815897624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,10240,8,0,0.9431680043538412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,10240,16,0,0.5294186671574911
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,10240,64,0,0.21476266781489053
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,10240,32,0,0.3264213403065999
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,10240,2,0,3.8002452850341797
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,10240,128,0,0.1831573247909546
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,10240,1,0,7.601135889689128
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,10240,32,0,0.29893332719802856
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,10240,8,0,0.8418880303700765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,10240,64,0,0.1984106699625651
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,10240,2,0,3.189493179321289
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,10240,16,0,0.47551465034484863
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,10240,4,0,1.5779040654500325
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,10240,128,0,0.15595733126004538
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,10240,1,0,6.40997314453125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,12288,128,0,0.21081600586573282
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,12288,64,0,0.2789280017217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,12288,32,0,0.4299253225326538
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,12288,16,0,0.7191413243611654
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,12288,8,0,1.3074133396148682
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,12288,4,0,2.4897333780924478
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,12288,2,0,5.477013270060222
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,12288,1,0,10.654336293538412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,12288,16,0,0.6437280178070068
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,12288,32,0,0.3877333402633667
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,12288,1,0,9.048490524291992
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,12288,8,0,1.1536586284637451
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,12288,64,0,0.25190399090449017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,12288,128,0,0.19100799163182577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,12288,2,0,4.443589210510254
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,12288,4,0,2.186090628306071
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,1,16384,128,0,0.2794933319091797
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,1,16384,64,0,0.4378346602121989
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,1,16384,32,0,0.684117317199707
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,1,16384,16,0,1.1903573671976726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,1,16384,8,0,2.3757012685139975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,1,16384,4,0,4.557759920756022
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,1,16384,2,0,9.51414934794108
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,1,16384,1,0,18.935503641764324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,1,16384,16,0,1.043514649073283
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,1,16384,8,0,1.9353173573811848
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,1,16384,32,0,0.6113333304723104
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,1,16384,4,0,3.8892641067504883
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,1,16384,128,0,0.24810665845870972
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,1,16384,64,0,0.3949600060780843
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,1,16384,2,0,7.795637130737305
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,16,1,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,1,16384,1,0,15.225680033365885
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,16,4,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,16,2,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,16,32,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,16,8,0,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,16,128,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,16,64,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,16,16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,16,2,0,0.020495999604463577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,16,4,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,16,1,0,0.024080000817775726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,16,8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,16,32,0,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,16,16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,16,64,0,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,16,128,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,32,2,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,32,4,0,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,32,1,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,32,16,0,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,32,8,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,32,32,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,32,64,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,32,1,0,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,32,128,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,32,4,0,0.02189333240191142
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,32,2,0,0.022458667556444805
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,32,8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,32,16,0,0.020645332833131153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,32,128,0,0.020143999407688778
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,32,32,0,0.027701333165168762
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,32,64,0,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,64,1,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,64,4,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,64,2,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,64,8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,64,16,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,64,64,0,0.01617066686352094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,64,128,0,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,64,32,0,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,64,2,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,64,1,0,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,64,4,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,64,8,0,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,64,16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,64,32,0,0.020655999581019085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,64,128,0,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,64,64,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,128,1,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,128,2,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,128,4,0,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,128,8,0,0.018277333428462345
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,128,16,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,128,32,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,128,64,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,128,128,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,128,2,0,0.02465066562096278
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,128,1,0,0.04251199960708618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,128,4,0,0.021946666141351063
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,128,16,0,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,128,8,0,0.021717332303524017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,128,32,0,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,128,64,0,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,128,128,0,0.020314666132132213
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,256,1,0,0.04958933095137278
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,256,2,0,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,256,4,0,0.02236266682545344
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,256,8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,256,16,0,0.020410666863123577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,256,32,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,256,64,0,0.01834133391578992
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,256,1,0,0.07408533493677776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,256,128,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,256,2,0,0.04637333254019419
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,256,4,0,0.026629333694775898
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,256,8,0,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,256,16,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,256,32,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,256,64,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,512,1,0,0.09843200445175171
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,256,128,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,512,4,0,0.043706665436426796
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,512,2,0,0.05919999877611796
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,512,8,0,0.02681066592534383
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,512,16,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,512,32,0,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,512,128,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,512,2,0,0.09507200121879578
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,512,64,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,512,4,0,0.053210665782292686
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,512,8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,512,1,0,0.14281066258748373
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,512,16,0,0.028309332827727
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,512,32,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,1024,1,0,0.23890133698781332
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,1024,4,0,0.07975466549396515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,512,64,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,1024,2,0,0.13395733634630838
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,1024,8,0,0.05356266597906748
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,1024,16,0,0.03606933355331421
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,512,128,0,0.02661866694688797
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,1024,32,0,0.03639466563860575
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,1024,128,0,0.03198933353026708
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,1024,64,0,0.03180266668399175
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,1024,2,0,0.17246399323145548
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,1024,1,0,0.3136960069338481
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,1024,32,0,0.03454400102297465
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,1024,8,0,0.06463466584682465
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,1024,4,0,0.09914132952690125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,1024,16,0,0.03851199895143509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,1024,64,0,0.034517332911491394
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,1024,128,0,0.03412266572316488
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,1536,1,0,0.4450666507085164
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,1536,8,0,0.08430400490760803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,1536,2,0,0.2390399972597758
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,1536,32,0,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,1536,4,0,0.1351093351840973
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,1536,16,0,0.05810666580994924
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,1536,64,0,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,1536,128,0,0.039093332986036934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,1536,1,0,0.5283573468526205
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,1536,4,0,0.1593226691087087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,1536,2,0,0.2821706732114156
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,1536,16,0,0.06409599880377452
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,1536,8,0,0.09832533200581868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,1536,32,0,0.04333333174387614
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,1536,64,0,0.04200533529122671
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,1536,128,0,0.040805332362651825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,2048,8,0,0.12168000141779582
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,2048,4,0,0.20696532726287842
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,2048,1,0,0.7093599637349447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,2048,2,0,0.3745013475418091
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,2048,32,0,0.05061866839726766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,2048,16,0,0.08051733175913493
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,2048,64,0,0.05048533280690511
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,2048,1,0,0.7969653606414795
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,2048,128,0,0.04760533571243286
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,2048,8,0,0.1355893313884735
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,2048,16,0,0.08690133690834045
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,2048,32,0,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,2048,4,0,0.23150400320688883
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,2048,64,0,0.0491893341143926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,2048,2,0,0.4209226767222087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,2048,128,0,0.04790933430194855
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,3072,1,0,1.431125322977702
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,3072,2,0,0.7417493661244711
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,3072,32,0,0.09328533212343852
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,3072,8,0,0.22234133879343668
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,3072,4,0,0.39625600973765057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,3072,64,0,0.07025066514809926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,3072,16,0,0.1433013379573822
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,3072,128,0,0.0636053333679835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,3072,2,0,0.7684533596038818
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,3072,1,0,1.479845364888509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,3072,4,0,0.40510400136311847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,3072,8,0,0.23061333100001016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,3072,16,0,0.14180800318717957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,3072,32,0,0.09291199843088786
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,3072,64,0,0.06562133133411407
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,3072,128,0,0.06297599772612254
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,4096,2,0,1.2139893372853596
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,4096,1,0,2.402245362599691
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,4096,4,0,0.6487520138422648
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,4096,16,0,0.20803733666737875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,4096,32,0,0.137061337629954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,4096,64,0,0.08487466971079509
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,4096,8,0,0.3515946865081787
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,4096,128,0,0.08130133152008057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,4096,1,0,2.3695732752482095
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,4096,4,0,0.6355253458023071
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,4096,2,0,1.21507732073466
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,4096,8,0,0.35650134086608887
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,4096,16,0,0.20700265963872275
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,4096,32,0,0.1328266660372416
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,4096,64,0,0.0844640036424001
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,4096,128,0,0.07800533374150594
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,6144,2,0,2.815018653869629
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,6144,16,0,0.39641066392262775
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,6144,64,0,0.15969600280125937
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,6144,32,0,0.23828266064325967
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,6144,8,0,0.6995893319447836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,6144,4,0,1.3361066182454426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,6144,128,0,0.11807466546694438
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,6144,1,0,5.5802561442057295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,6144,1,0,5.098704020182292
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,6144,16,0,0.37309332688649494
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,6144,32,0,0.22707200050354004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,6144,64,0,0.15053866306940714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,6144,8,0,0.6607893308003744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,6144,4,0,1.2363839944203694
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,6144,128,0,0.11205866932868958
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,6144,2,0,2.418085257212321
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,8192,1,0,9.88868268330892
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,8192,2,0,4.873525301615397
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,8192,128,0,0.15518933534622192
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,8192,16,0,0.6398559808731079
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,8192,32,0,0.372927983601888
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,8192,64,0,0.24210133155186972
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,8192,8,0,1.1755413214365642
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,8192,4,0,2.3228480021158853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,8192,2,0,4.110128084818522
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,8192,1,0,8.519994735717773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,8192,64,0,0.22138667106628418
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,8192,8,0,1.0714773337046306
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,8192,16,0,0.5892693201700846
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,8192,128,0,0.14597866932551065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,8192,32,0,0.34162668387095135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,8192,4,0,2.0725439389546714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,10240,2,0,7.703760147094727
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,10240,128,0,0.22205867369969687
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,10240,32,0,0.5424053271611532
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,10240,16,0,0.939621369043986
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,10240,64,0,0.3330826759338379
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,10240,8,0,1.8410773277282715
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,10240,1,0,15.118085225423178
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,10240,4,0,3.8420000076293945
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,10240,2,0,6.336138407389323
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,10240,1,0,12.390069325764975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,10240,64,0,0.30709866682688397
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,10240,32,0,0.48444799582163495
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,10240,16,0,0.8426187038421631
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,10240,128,0,0.20034132401148477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,10240,8,0,1.581077257792155
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,10240,4,0,3.1511147816975913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,12288,2,0,10.824532826741537
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,12288,64,0,0.4368799924850464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,12288,128,0,0.2882399956385295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,12288,32,0,0.7354079882303873
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,12288,16,0,1.3294133345286052
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,12288,8,0,2.6822827657063804
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,12288,4,0,5.4311949412028
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,12288,4,0,4.48855463663737
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,12288,2,0,8.876458485921225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,12288,8,0,2.231525262196859
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,12288,1,0,21.68194580078125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,12288,32,0,0.6491146485010783
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,12288,64,0,0.3977706829706828
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,12288,16,0,1.160912036895752
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,12288,128,0,0.25939200321833294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,12288,1,0,17.99835205078125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,2,16384,16,0,2.2194080352783203
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,2,16384,64,0,0.6983359654744467
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,2,16384,32,0,1.214565356572469
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,2,16384,128,0,0.4520266850789388
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,2,16384,8,0,4.82150936126709
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,2,16384,4,0,9.609893163045248
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,2,16384,8,0,3.92414919535319
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,2,16384,4,0,7.816325505574544
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,2,16384,16,0,1.9862720171610515
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,2,16384,32,0,1.0586133003234863
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,2,16384,64,0,0.6164906819661459
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,2,16384,2,0,18.899205525716145
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,2,16384,128,0,0.3964266777038574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,16,1,0,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,2,16384,2,0,15.151514689127604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,16,32,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,16,2,0,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,16,4,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,16,8,0,0.016602666427691776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,16,16,0,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,16,64,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,2,16384,1,0,30.559280395507812
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,2,16384,1,0,40.65454864501953
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,16,128,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,16,1,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,16,4,0,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,16,2,0,0.02569599946339925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,16,8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,16,16,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,16,32,0,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,16,64,0,0.020106667031844456
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,16,128,0,0.01993600030740102
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,32,1,0,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,32,2,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,32,8,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,32,4,0,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,32,16,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,32,32,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,32,64,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,32,128,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,32,1,0,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,32,2,0,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,32,8,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,32,4,0,0.02213866760333379
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,32,16,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,32,64,0,0.020282667130231857
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,32,128,0,0.02038399999340375
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,32,32,0,0.020810666183630627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,64,1,0,0.033802665770053864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,64,2,0,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,64,4,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,64,8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,64,16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,64,32,0,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,64,64,0,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,64,1,0,0.04808533191680908
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,64,128,0,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,64,2,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,64,4,0,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,64,8,0,0.021946666141351063
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,64,16,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,128,2,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,64,64,0,0.020410666863123577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,64,128,0,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,128,1,0,0.050437331199645996
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,64,32,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,128,4,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,128,8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,128,16,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,128,1,0,0.07052266597747803
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,128,32,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,128,128,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,128,2,0,0.042352000872294106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,128,64,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,128,8,0,0.022687998910744984
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,128,4,0,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,128,16,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,128,32,0,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,128,64,0,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,128,128,0,0.020618667205174763
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,256,2,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,256,1,0,0.08084799846013387
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,256,8,0,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,256,4,0,0.03301866600910822
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,256,16,0,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,256,32,0,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,256,64,0,0.019978666057189304
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,256,128,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,256,2,0,0.07329066594441731
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,256,1,0,0.1279039978981018
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,256,4,0,0.04655466477076212
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,256,8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,256,16,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,256,32,0,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,256,64,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,512,2,0,0.09851200381914775
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,256,128,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,512,8,0,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,512,1,0,0.17549866437911987
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,512,4,0,0.059706668059031166
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,512,16,0,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,512,32,0,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,512,64,0,0.024330665667851765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,512,128,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,512,2,0,0.14306132992108664
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,512,4,0,0.08320533235867818
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,512,1,0,0.26001065969467163
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,512,8,0,0.05388266841570536
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,512,16,0,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,512,32,0,0.02886933336655299
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,512,64,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,512,128,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,1024,4,0,0.1389173368612925
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,1024,1,0,0.44998399416605633
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,1024,8,0,0.08012266457080841
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,1024,2,0,0.24151466290156046
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,1024,32,0,0.034645333886146545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,1024,16,0,0.055306668082873024
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,1024,128,0,0.031685332457224526
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,1024,64,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,1024,4,0,0.171999990940094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,1024,8,0,0.10458667079607646
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,1024,1,0,0.5885813236236572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,1024,16,0,0.06602666775385539
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,1024,2,0,0.3115359942118327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,1024,32,0,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,1024,128,0,0.034703999757766724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,1024,64,0,0.03719466676314672
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,1536,4,0,0.24077866474787393
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,1536,2,0,0.4557439883550008
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,1536,16,0,0.08542399605115254
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,1536,1,0,0.8666826883951823
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,1536,8,0,0.1383573313554128
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,1536,32,0,0.059792002042134605
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,1536,64,0,0.042437334855397545
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,1536,128,0,0.04134399940570196
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,1536,1,0,1.018287976582845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,1536,32,0,0.06598933537801106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,1536,64,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,1536,8,0,0.160671999057134
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,1536,16,0,0.09973866740862529
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,1536,2,0,0.5298346678415934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,1536,4,0,0.28465066353480023
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,1536,128,0,0.042261332273483276
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,2048,1,0,1.4351253509521484
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,2048,2,0,0.7123306592305502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,2048,64,0,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,2048,32,0,0.08755200107892354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,2048,4,0,0.382479985555013
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,2048,8,0,0.21039466063181558
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,2048,128,0,0.050250664353370667
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,2048,16,0,0.12635200222333273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,2048,1,0,1.5538132985432942
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,2048,2,0,0.801034688949585
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,2048,16,0,0.13642666737238565
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,2048,8,0,0.23522667090098062
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,2048,4,0,0.4215253194173177
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,2048,128,0,0.05121066669623057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,2048,32,0,0.08998933434486389
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,2048,64,0,0.05871466795603434
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,3072,1,0,3.1077067057291665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,3072,2,0,1.4365545908610027
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,3072,16,0,0.22978132963180542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,3072,4,0,0.7524212996164957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,3072,8,0,0.3945653438568115
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,3072,32,0,0.14166933298110962
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,3072,64,0,0.09572266538937886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,3072,128,0,0.07270933190981548
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,3072,1,0,3.0176000595092773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,3072,2,0,1.4888960520426433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,3072,4,0,0.7716853618621826
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,3072,8,0,0.4100053310394287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,3072,32,0,0.14539200067520142
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,3072,128,0,0.07288533449172974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,3072,16,0,0.233952005704244
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,3072,64,0,0.09744532903035481
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,4096,2,0,2.415797392527262
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,4096,4,0,1.2329706350962322
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,4096,1,0,5.409365336100261
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,4096,8,0,0.6530293226242065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,4096,128,0,0.09064533313115437
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,4096,64,0,0.1413226624329885
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,4096,16,0,0.35674134890238446
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,4096,32,0,0.21408534049987793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,4096,4,0,1.2194666862487793
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,4096,1,0,4.923402786254883
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,4096,2,0,2.4189279874165854
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,4096,64,0,0.13742933670679727
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,4096,8,0,0.6425600051879883
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,4096,32,0,0.20791999499003092
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,4096,128,0,0.09019200007120769
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,4096,16,0,0.3540159861246745
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,6144,4,0,2.6865228017171225
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,6144,2,0,5.7739518483479815
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,6144,16,0,0.7250400384267172
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,6144,1,0,11.548245747884115
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,6144,128,0,0.1673333247502645
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,6144,64,0,0.24754667282104492
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,6144,32,0,0.39906132221221924
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,6144,8,0,1.3339145978291829
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,6144,128,0,0.15688000122706094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,6144,64,0,0.23122133811314902
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,6144,32,0,0.3781973520914714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,6144,8,0,1.2485919793446858
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,6144,16,0,0.6659520069758097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,6144,4,0,2.416938622792562
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,6144,1,0,10.032623926798502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,6144,2,0,5.077781359354655
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,4,8192,32,0,0.650325338045756
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,4,8192,64,0,0.3856053352355957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,4,8192,16,0,1.2018720308939617
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,4,8192,8,0,2.43668270111084
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,4,8192,128,0,0.2526666720708211
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,4,8192,4,0,5.032320022583008
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,4,8192,2,0,9.94652239481608
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,4,8192,1,0,19.571428934733074
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,4,8192,8,0,2.069178740183512
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,4,8192,32,0,0.5996960004170736
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,4,8192,16,0,1.0850880146026611
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,4,8192,64,0,0.3575199842453003
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,4,8192,128,0,0.23381332556406656
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,4,8192,4,0,4.314544041951497
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,4,8192,2,0,8.529162724812826
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,16,1,0,0.041562666495641075
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,4,8192,1,0,16.937781016031902
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,16,4,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,16,2,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,16,16,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,16,8,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,16,32,0,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,16,128,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,16,64,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,16,1,0,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,16,4,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,16,8,0,0.02202133337656657
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,16,2,0,0.03216533362865448
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,16,64,0,0.020506666352351505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,16,16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,16,32,0,0.020682666450738907
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,16,128,0,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,32,1,0,0.04514666895071665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,32,16,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,32,8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,32,2,0,0.028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,32,32,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,32,4,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,32,64,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,32,128,0,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,32,1,0,0.058058664202690125
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,32,2,0,0.03282133241494497
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,32,8,0,0.02242133269707362
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,32,64,0,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,32,4,0,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,32,16,0,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,32,128,0,0.020560000091791153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,32,32,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,64,2,0,0.03491200009981791
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,64,1,0,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,64,64,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,64,4,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,64,8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,64,32,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,64,16,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,64,128,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,64,2,0,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,64,1,0,0.07982400059700012
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,64,16,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,64,4,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,128,2,0,0.04919999837875366
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,64,8,0,0.022805333137512207
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,64,32,0,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,64,64,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,128,1,0,0.08493333061536153
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,64,128,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,128,16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,128,4,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,128,8,0,0.022117334107557934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,128,1,0,0.12532266974449158
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,128,128,0,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,128,32,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,128,64,0,0.01783466711640358
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,128,2,0,0.07011199990908305
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,128,4,0,0.04307733476161957
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,128,8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,128,16,0,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,128,64,0,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,128,128,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,128,32,0,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,256,1,0,0.14551466703414917
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,256,4,0,0.05060799916585287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,256,2,0,0.08182399968306224
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,256,8,0,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,256,16,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,256,32,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,256,64,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,256,128,0,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,256,1,0,0.2311306595802307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,256,2,0,0.12777066230773926
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,256,4,0,0.07425599793593089
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,256,8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,256,16,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,256,64,0,0.02380266785621643
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,256,32,0,0.024853333830833435
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,256,128,0,0.023936000963052113
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,512,1,0,0.3261546691258748
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,512,2,0,0.17740267515182495
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,512,4,0,0.09885866443316142
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,512,8,0,0.061050668358802795
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,512,16,0,0.042352000872294106
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,512,32,0,0.026890667776266735
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,512,64,0,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,512,128,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,512,1,0,0.485424002011617
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,512,2,0,0.2581226627031962
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,512,4,0,0.1434453328450521
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,512,8,0,0.08442133665084839
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,512,16,0,0.0543093333641688
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,512,32,0,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,512,128,0,0.028042666614055634
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,512,64,0,0.028650666276613872
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,1024,1,0,0.8867626984914144
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,1024,8,0,0.13911466797192892
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,1024,16,0,0.08348799745241801
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,1024,4,0,0.2455093264579773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,1024,2,0,0.4618080059687297
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,1024,32,0,0.061119998494784035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,1024,64,0,0.03808533400297165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,1024,128,0,0.03435733417669932
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,1024,1,0,1.1514453093210857
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,1024,2,0,0.5945599873860677
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,1024,4,0,0.3168960014979045
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,1024,16,0,0.10268266995747884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,1024,8,0,0.17430933316548666
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,1024,64,0,0.04436799883842468
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,1024,32,0,0.06861866513888042
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,1024,128,0,0.03719999889532725
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,1536,1,0,1.770271937052409
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,1536,4,0,0.45104531447092694
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,1536,2,0,0.8583573500315348
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,1536,8,0,0.24693334102630615
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,1536,16,0,0.14308800299962363
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,1536,128,0,0.047685335079828896
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,1536,32,0,0.09014399846394856
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,1536,64,0,0.06440000236034393
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,1536,4,0,0.5365013281504313
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,1536,16,0,0.1634773313999176
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,1536,8,0,0.28571200370788574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,1536,1,0,2.021626631418864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,1536,2,0,1.0308480262756348
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,1536,64,0,0.07067733506361644
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,1536,32,0,0.10310399532318115
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,1536,128,0,0.05359466870625814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,2048,32,0,0.13237333297729492
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,2048,16,0,0.2171893318494161
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,2048,1,0,2.8734718958536782
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,2048,2,0,1.407375971476237
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,2048,8,0,0.3863733212153117
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,2048,128,0,0.06051200131575266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,2048,4,0,0.7244266668955485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,2048,64,0,0.08859733740488689
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,2048,1,0,3.0848747889200845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,2048,16,0,0.2367039918899536
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,2048,128,0,0.06365333497524261
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,2048,32,0,0.14033599694569907
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,2048,64,0,0.09396800398826599
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,2048,8,0,0.4253386656443278
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,2048,2,0,1.5739307403564453
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,2048,4,0,0.8109760284423828
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,3072,1,0,6.3378346761067705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,3072,2,0,3.1254666646321616
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,3072,128,0,0.1037546694278717
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,3072,8,0,0.7514239947001139
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,3072,32,0,0.2367146611213684
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,3072,64,0,0.15030399958292642
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,3072,16,0,0.40803734461466473
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,3072,4,0,1.4305973052978516
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,3072,1,0,6.1433976491292315
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,3072,2,0,2.9740587870279946
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,3072,16,0,0.41705600420633954
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,3072,32,0,0.2434933384259542
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,3072,8,0,0.7803626855214437
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,3072,4,0,1.5159573554992676
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,3072,128,0,0.10502400000890096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,3072,64,0,0.15577066938082376
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,8,4096,1,0,10.860965728759766
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,8,4096,8,0,1.2668853600819905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,8,4096,2,0,5.3564802805582685
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,8,4096,64,0,0.223578671614329
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,8,4096,32,0,0.36722131570180255
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,8,4096,128,0,0.15241600076357523
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,8,4096,16,0,0.657968004544576
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,8,4096,4,0,2.470954736073812
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,8,4096,4,0,2.438437302907308
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,8,4096,1,0,10.055728276570639
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,8,4096,8,0,1.2421013514200847
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,8,4096,64,0,0.21651732921600342
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,8,4096,32,0,0.3640586535135905
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,8,4096,16,0,0.6572106679280599
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,8,4096,128,0,0.1493760049343109
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,16,1,0,0.06982933481534322
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,8,4096,2,0,4.920378684997559
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,16,2,0,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,16,4,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,16,16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,16,8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,16,64,0,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,16,32,0,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,16,1,0,0.08203733464082082
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,16,128,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,16,2,0,0.04538666705290476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,16,4,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,16,8,0,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,16,64,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,16,32,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,16,16,0,0.022319999833901722
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,32,1,0,0.07288533449172974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,16,128,0,0.02032533288002014
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,32,4,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,32,2,0,0.04466133316357931
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,32,32,0,0.01617066686352094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,32,64,0,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,32,8,0,0.02187199890613556
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,32,128,0,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,32,16,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,32,1,0,0.10170132915178935
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,32,2,0,0.058464000622431435
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,32,4,0,0.03299200038115183
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,32,64,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,32,8,0,0.03206400076548258
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,32,16,0,0.02242133269707362
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,32,128,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,32,32,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,64,2,0,0.054378668467203774
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,64,1,0,0.08989333113034566
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,64,4,0,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,64,8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,64,16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,64,32,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,64,64,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,64,128,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,64,2,0,0.07955733438332875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,64,1,0,0.14356799920399985
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,64,16,0,0.022698665658632915
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,64,8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,64,64,0,0.022053333620230358
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,64,4,0,0.04861866434415182
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,128,1,0,0.14851199587186178
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,128,2,0,0.08381332953770955
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,64,32,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,64,128,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,128,4,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,128,16,0,0.02250666668017705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,128,8,0,0.03230933348337809
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,128,32,0,0.019744000087181728
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,128,64,0,0.05117333432038625
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,128,128,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,128,1,0,0.219760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,128,2,0,0.12312533458073933
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,128,4,0,0.07083733379840851
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,128,8,0,0.044490665197372437
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,128,64,0,0.022389332453409832
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,128,32,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,128,128,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,128,16,0,0.025936000049114227
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,256,2,0,0.14669332901636759
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,256,1,0,0.27031999826431274
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,256,4,0,0.08383466800053914
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,256,8,0,0.052282666166623436
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,256,16,0,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,256,64,0,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,256,32,0,0.024090667565663654
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,256,128,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,256,2,0,0.2355146606763204
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,256,1,0,0.4371519883473714
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,256,4,0,0.12827199697494507
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,256,8,0,0.0758133331934611
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,256,16,0,0.04897066454092661
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,256,64,0,0.02586666742960612
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,256,32,0,0.03068800022204717
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,256,128,0,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,512,1,0,0.6491359869639078
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,512,4,0,0.18035733699798584
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,512,2,0,0.3305013378461202
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,512,8,0,0.10735467076301575
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,512,16,0,0.06580266853173573
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,512,64,0,0.030282666285832722
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,512,128,0,0.029648000995318096
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,512,4,0,0.26097599665323895
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,512,32,0,0.04553066690762838
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,512,2,0,0.48863999048868817
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,512,8,0,0.14552000164985657
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,512,1,0,0.9528373082478842
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,512,32,0,0.05759466687838236
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,512,16,0,0.08656000097592671
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,512,64,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,512,128,0,0.03107200066248576
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,1024,2,0,0.8977493445078532
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,1024,1,0,1.754410743713379
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,1024,4,0,0.4668426513671875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,1024,8,0,0.25366934140523273
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,1024,16,0,0.14386133352915445
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,1024,32,0,0.09033067027727763
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,1024,64,0,0.06284266710281372
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,1024,128,0,0.04515199859937032
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,1024,4,0,0.598853349685669
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,1024,8,0,0.3195573290189107
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,1024,16,0,0.17892799774805704
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,1024,64,0,0.07292266686757405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,1024,32,0,0.10648000240325928
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,1024,2,0,1.1640586853027344
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,1024,1,0,2.286351998647054
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,1024,128,0,0.05053333441416422
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,1536,4,0,0.8733173211415609
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,1536,8,0,0.4570506811141968
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,1536,32,0,0.15392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,1536,16,0,0.2568533420562744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,1536,64,0,0.09845333298047383
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,1536,2,0,1.6903039614359539
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,1536,1,0,3.6355625788370767
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,1536,128,0,0.07460799813270569
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,1536,16,0,0.30018667380015057
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,1536,32,0,0.1774666706720988
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,1536,4,0,1.04639999071757
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,1536,8,0,0.5469919840494791
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,1536,2,0,2.0439573923746743
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,1536,64,0,0.10913599530855815
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,1536,128,0,0.07771733403205872
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,1536,1,0,4.065290768941243
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,16,2048,1,0,5.9282881418863935
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,16,2048,2,0,2.9141759872436523
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,16,2048,64,0,0.14316800236701965
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,16,2048,32,0,0.22593599557876587
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,16,2048,16,0,0.39377065499623615
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,16,2048,4,0,1.4063199361165364
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,16,2048,128,0,0.10251733660697937
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,16,2048,8,0,0.7328960100809733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,16,2048,2,0,3.213242530822754
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,16,2048,1,0,6.374949137369792
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,16,2048,16,0,0.4378559986750285
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,16,2048,8,0,0.8221386273701986
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,16,2048,4,0,1.5998506546020508
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,16,2048,32,0,0.24665600061416626
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,16,2048,128,0,0.10356799761454265
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,16,2048,64,0,0.1497066617012024
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,32,16,1,0,0.1267626682917277
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,32,16,2,0,0.06860266625881195
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,32,16,4,0,0.049141332507133484
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,32,16,8,0,0.027893332143624622
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,32,16,64,0,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,32,16,16,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,32,16,32,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,32,16,128,0,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,32,16,1,0,0.14331733187039694
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,32,16,2,0,0.08036800225575765
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,32,16,4,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,32,16,8,0,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,32,16,16,0,0.036277333895365395
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,32,16,32,0,0.022677332162857056
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,32,16,64,0,0.022042666872342426
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,32,32,2,0,0.07301866511503856
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,32,16,128,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,32,32,1,0,0.13062933087348938
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,32,32,8,0,0.027669332921504974
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,32,32,4,0,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,32,32,32,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,32,32,16,0,0.020506666352351505
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,32,32,64,0,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,32,32,128,0,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,32,32,1,0,0.18533867597579956
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,32,32,2,0,0.09831466277440389
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,32,32,4,0,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,32,32,8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,32,32,16,0,0.025983999172846477
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,32,32,32,0,0.022143999735514324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,32,32,128,0,0.02197866638501485
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,32,32,64,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,32,64,1,0,0.16341333587964377
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,32,64,2,0,0.09039466579755147
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,32,64,4,0,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,32,64,8,0,0.03623999903599421
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,32,64,16,0,0.024400000770886738
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,32,64,32,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,32,64,64,0,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,32,64,1,0,0.2641333341598511
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,32,64,2,0,0.14447999993960062
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,32,64,128,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,32,64,4,0,0.07996800045172374
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,32,64,8,0,0.049626668294270836
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,32,64,16,0,0.030320001145203907
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,32,64,32,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,32,64,64,0,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,32,64,128,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,32,128,4,0,0.08877333005269368
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,32,128,1,0,0.28034667174021405
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,32,128,2,0,0.14873600006103516
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,32,128,8,0,0.05153066913286845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,32,128,16,0,0.03493333359559377
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,32,128,64,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,32,128,32,0,0.023754666248957317
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,32,128,128,0,0.020794666061798733
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,32,128,2,0,0.2218773365020752
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,32,128,1,0,0.41527998447418213
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,32,128,8,0,0.07256000240643819
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,32,128,4,0,0.12369599938392639
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,32,128,16,0,0.04566933214664459
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,32,128,32,0,0.02980799973011017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,32,128,128,0,0.022848000129063923
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,32,128,64,0,0.02385599911212921
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,32,256,4,0,0.14919466773668924
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,32,256,1,0,0.5112266540527344
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,32,256,2,0,0.27089067300160724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,32,256,16,0,0.05657599866390228
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,32,256,8,0,0.08809600273768108
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,32,256,32,0,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,32,256,64,0,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,32,256,128,0,0.023733332753181458
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,32,256,4,0,0.23372799158096313
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,32,256,2,0,0.4387146631876628
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,32,256,8,0,0.132560004790624
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,32,256,1,0,0.85261336962382
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,32,256,16,0,0.0780213326215744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,32,256,32,0,0.050944000482559204
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,32,256,64,0,0.03393599887688955
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,32,256,128,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,32,512,32,0,0.07140799860159557
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,32,512,2,0,0.6411306858062744
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,32,512,4,0,0.33579734961191815
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,32,512,8,0,0.19050665696461996
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,32,512,1,0,1.2583786646525066
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,32,512,16,0,0.1162506639957428
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,32,512,128,0,0.03798400113979975
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,32,512,64,0,0.05150400102138519
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,32,512,2,0,0.9617866675059
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,32,512,1,0,1.8926080067952473
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,32,512,64,0,0.06262399752934773
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,32,512,32,0,0.09114133318265279
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,32,512,8,0,0.2671893239021301
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,32,512,128,0,0.04403733213742574
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,32,512,16,0,0.1518346667289734
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,32,512,4,0,0.4962453444798787
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,32,1024,1,0,3.768853187561035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,32,1024,4,0,0.9126026630401611
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,32,1024,16,0,0.26154132684071857
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,32,1024,2,0,1.7522346178690593
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,32,1024,128,0,0.0748586654663086
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,32,1024,8,0,0.48106133937835693
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,32,1024,64,0,0.10095999638239543
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,32,1024,32,0,0.15622400244077048
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,32,1024,1,0,4.675109227498372
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,32,1024,2,0,2.3481653531392417
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,32,1024,4,0,1.1964800357818604
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,32,1024,64,0,0.1156213382879893
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,32,1024,16,0,0.3303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,32,1024,32,0,0.18947199980417886
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,32,1024,128,0,0.08227733274300893
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,32,1024,8,0,0.6175893147786459
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,64,16,1,0,0.24266666173934937
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,64,16,2,0,0.1242026686668396
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,64,16,4,0,0.07099199791749318
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,64,16,16,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,64,16,8,0,0.0800853321949641
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,64,16,32,0,0.020021333048741024
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,64,16,2,0,0.14198933045069376
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,64,16,64,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,64,16,1,0,0.2696479956309001
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,64,16,128,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,64,16,4,0,0.08166400094827016
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,64,16,8,0,0.04529066880544027
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,64,16,32,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,64,16,16,0,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,64,16,128,0,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,64,16,64,0,0.022474666436513264
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,64,32,4,0,0.07456533114115398
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,64,32,1,0,0.25091199080149335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,64,32,2,0,0.13081066807111105
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,64,32,8,0,0.04531733194986979
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,64,32,16,0,0.028880000114440918
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,64,32,64,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,64,32,1,0,0.34331734975179035
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,64,32,32,0,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,64,32,8,0,0.05929600199063619
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,64,32,128,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,64,32,2,0,0.18337599436442056
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,64,32,4,0,0.09876267115275066
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,64,32,16,0,0.035114665826161705
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,64,32,32,0,0.02622933437426885
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,64,32,64,0,0.022815999885400135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,64,32,128,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,64,64,2,0,0.1646346648534139
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,64,64,8,0,0.05518933137257894
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,64,64,1,0,0.3073493242263794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,64,64,16,0,0.0367999995748202
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,64,64,4,0,0.09285333752632141
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,64,64,32,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,64,64,64,0,0.020581333587567013
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,64,64,128,0,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,64,64,16,0,0.05090666810671488
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,64,64,1,0,0.5012426773707072
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,64,64,8,0,0.08067200084527333
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,64,64,4,0,0.14306666453679404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,64,64,2,0,0.264080007870992
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,64,64,32,0,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,64,64,64,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,64,64,128,0,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,64,128,1,0,0.5325013399124146
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,64,128,32,0,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,64,128,16,0,0.05453333258628845
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,64,128,4,0,0.15371732910474142
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,64,128,2,0,0.27958933512369794
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,64,128,8,0,0.08915733297665913
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,64,128,64,0,0.02812266598145167
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,64,128,128,0,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,64,128,1,0,0.8135626316070557
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,64,128,8,0,0.12589866916338602
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,64,128,2,0,0.4201013247172038
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,64,128,4,0,0.22298133373260498
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,64,128,16,0,0.075162669022878
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,64,128,32,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,64,128,128,0,0.026741333305835724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,64,128,64,0,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,64,256,4,0,0.2746933301289876
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,64,256,1,0,1.0112426280975342
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,64,256,2,0,0.5152959823608398
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,64,256,16,0,0.09236799677213033
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,64,256,32,0,0.06235733131567637
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,64,256,8,0,0.15661866466204324
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,64,256,64,0,0.045114666223526
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,64,256,128,0,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,64,256,32,0,0.08273600041866302
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,64,256,8,0,0.23881600300470987
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,64,256,16,0,0.13703999916712442
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,64,256,4,0,0.4441759983698527
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,64,256,2,0,0.8626879851023356
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,64,256,64,0,0.056090667843818665
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,64,256,128,0,0.040906667709350586
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,64,256,1,0,1.6962134043375652
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,64,512,1,0,2.483722686767578
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,64,512,32,0,0.12089066704114278
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,64,512,8,0,0.3466026782989502
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,64,512,4,0,0.6529706716537476
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,64,512,16,0,0.20241065820058188
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,64,512,64,0,0.08201600114504497
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,64,512,2,0,1.2678453127543132
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,64,512,128,0,0.06512000163396199
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,64,512,2,0,1.9414347012837727
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,64,512,1,0,3.8638718922932944
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,64,512,16,0,0.2765546639760335
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,64,512,8,0,0.5136213302612305
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,64,512,64,0,0.11633066336313884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,64,512,128,0,0.07409599920113881
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,64,512,32,0,0.1591093341509501
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,64,512,4,0,0.9871893723805746
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,128,16,1,0,0.49012800057729083
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,128,16,2,0,0.24813334147135416
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,128,16,4,0,0.14316266775131226
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,128,16,8,0,0.0703306645154953
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,128,16,32,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,128,16,64,0,0.020330666253964107
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,128,16,1,0,0.5155146519343058
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,128,16,16,0,0.04055466751257578
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,128,16,128,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,128,16,2,0,0.27340267101923627
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,128,16,4,0,0.14171199997266135
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,128,16,8,0,0.08136000235875447
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,128,16,16,0,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,128,16,32,0,0.0322026660044988
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,128,16,128,0,0.022533332308133442
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,128,16,64,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,128,32,4,0,0.13593066732088724
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,128,32,2,0,0.2488480011622111
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,128,32,1,0,0.5178506771723429
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,128,32,8,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,128,32,16,0,0.04632533093293508
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,128,32,32,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,128,32,128,0,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,128,32,64,0,0.022341333329677582
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,128,32,4,0,0.18408532937367758
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,128,32,1,0,0.6641759872436523
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,128,32,2,0,0.34402668476104736
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,128,32,16,0,0.06141333281993866
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,128,32,8,0,0.10064533352851868
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,128,32,64,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,128,32,32,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,128,32,128,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,128,64,4,0,0.16927466789881387
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,128,64,1,0,0.5916106700897217
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,128,64,2,0,0.30921600262324017
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,128,64,8,0,0.09612799684206645
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,128,64,16,0,0.058837334314982094
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,128,64,32,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,128,64,64,0,0.028143999477227528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,128,64,2,0,0.5123306512832642
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,128,64,128,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,128,64,4,0,0.2669386665026347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,128,64,16,0,0.08317333459854126
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,128,64,8,0,0.14697600404421488
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,128,64,32,0,0.05303466816743215
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,128,64,1,0,0.9859039783477783
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,128,64,64,0,0.03525333354870478
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,128,64,128,0,0.025754667818546295
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,128,128,8,0,0.15920533736546835
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,128,128,1,0,1.063482681910197
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,128,128,16,0,0.09521067142486572
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,128,128,32,0,0.0614026685555776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,128,128,4,0,0.2872533400853475
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,128,128,2,0,0.5382399956385294
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,128,128,64,0,0.04529599845409393
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,128,128,128,0,0.03453333427508672
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,128,128,4,0,0.4254293441772461
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,128,128,8,0,0.2290133237838745
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,128,128,64,0,0.0524586687485377
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,128,128,16,0,0.1397760013739268
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,128,128,2,0,0.8232800165812174
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,128,128,32,0,0.08009600142637889
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,128,128,1,0,1.6194720268249512
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,128,128,128,0,0.040181333820025124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,128,256,16,0,0.16521599888801575
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,128,256,8,0,0.28875732421875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,128,256,32,0,0.10532800356547038
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,128,256,4,0,0.5277386506398519
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,128,256,64,0,0.07415466507275899
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,128,256,2,0,1.020085334777832
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,128,256,1,0,2.003333409627279
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,128,256,128,0,0.05657066901524862
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,128,256,1,0,3.4744478861490884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,128,256,4,0,0.8874773184458414
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,128,256,16,0,0.2511253356933594
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,128,256,8,0,0.46086398760477704
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,128,256,64,0,0.09592533111572266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,128,256,32,0,0.1469386617342631
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,128,256,128,0,0.06534400085608165
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,128,256,2,0,1.748538653055827
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,256,16,1,0,0.9986773331960043
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,256,16,2,0,0.4968159993489583
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,256,16,4,0,0.23663999636967978
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,256,16,8,0,0.1267253359158834
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,256,16,32,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,256,16,64,0,0.02787200113137563
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,256,16,16,0,0.0693333347638448
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,256,16,128,0,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,256,16,2,0,0.5194826523462931
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,256,16,1,0,1.0094666481018066
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,256,16,4,0,0.2709653377532959
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,256,16,8,0,0.14410133163134256
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,256,16,16,0,0.08335466186205547
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,256,16,64,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,256,16,32,0,0.04869333406289419
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,256,16,128,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,256,32,2,0,0.523365338643392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,256,32,8,0,0.13331733147303262
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,256,32,4,0,0.25647467374801636
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,256,32,1,0,1.028645356496175
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,256,32,16,0,0.07701866825421651
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,256,32,32,0,0.04910933474699656
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,256,32,128,0,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,256,32,4,0,0.34966933727264404
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,256,32,64,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,256,32,8,0,0.1876586675643921
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,256,32,2,0,0.6668586730957031
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,256,32,1,0,1.3083786964416504
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,256,32,16,0,0.10365866621335347
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,256,32,32,0,0.06369600196679433
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,256,32,64,0,0.042277331153551735
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,256,32,128,0,0.028853334486484528
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,256,64,2,0,0.6105440060297648
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,256,64,4,0,0.3165546655654907
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,256,64,8,0,0.17842666308085123
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,256,64,1,0,1.1799573103586833
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,256,64,16,0,0.10213333368301392
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,256,64,32,0,0.06583466629187266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,256,64,64,0,0.04783466458320618
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,256,64,128,0,0.03612266729275385
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,256,64,2,0,0.9974613189697266
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,256,64,1,0,1.969194730122884
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,256,64,4,0,0.5125493208567301
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,256,64,8,0,0.2722613414128621
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,256,64,128,0,0.04241600135962168
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,256,64,32,0,0.08854933579762776
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,256,64,16,0,0.15134933590888977
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,256,64,64,0,0.058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,64,256,128,2,0,1.044416030248006
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,32,256,128,4,0,0.5543680191040039
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,128,256,128,1,0,2.062117258707682
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,16,256,128,8,0,0.29601067304611206
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,8,256,128,16,0,0.170415997505188
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,2,256,128,64,0,0.07393066585063934
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,4,256,128,32,0,0.10797333717346191
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,float16,1,256,128,128,0,0.05793066819508871
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,16,256,128,8,0,0.4431626796722412
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,32,256,128,4,0,0.8643573125203451
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,64,256,128,2,0,1.6762720743815105
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,4,256,128,32,0,0.1397279997666677
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,128,256,128,1,0,3.315312067667643
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,2,256,128,64,0,0.0886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,1,256,128,128,0,0.0620959997177124
TRTLLM,1.2.0rc5,NVIDIA H200,mla_context,default,float16,fp8,8,256,128,16,0,0.24119999011357626
