framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,16,1,0,0.014639999717473984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,16,2,0,0.0129120002190272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,16,64,0,0.01184533288081487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,16,8,0,0.013610667238632837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,16,4,0,0.013786666095256805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,16,2,0,0.012885333349307379
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,16,16,0,0.01292266696691513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,16,4,0,0.012608000387748083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,16,32,0,0.01198400060335795
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,16,1,0,0.014848000059525171
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,16,32,0,0.01184533288081487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,16,64,0,0.01192533348997434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,16,16,0,0.011861333002646765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,32,1,0,0.013162666310866674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,32,2,0,0.013653332988421122
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,32,4,0,0.012773333738247553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,32,8,0,0.012448000411192576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,32,16,0,0.012128000458081564
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,32,32,0,0.012576000144084295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,32,64,0,0.012282667060693106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,32,1,0,0.013114667187134424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,32,2,0,0.013616000612576803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,16,8,0,0.013541333377361298
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,32,8,0,0.012448000411192576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,32,4,0,0.014901333798964819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,32,16,0,0.012613333761692047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,32,32,0,0.012597333639860153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,32,64,0,0.012805332740147909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,64,4,0,0.013738666971524557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,64,1,0,0.014565333724021912
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,64,16,0,0.013514666507641474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,64,8,0,0.013967999567588171
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,64,2,0,0.014389333625634512
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,64,32,0,0.013536000003417334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,64,64,0,0.013487999637921652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,64,2,0,0.01393066719174385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,64,1,0,0.014815999815861383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,64,4,0,0.013898666948080063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,64,8,0,0.014138666292031607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,64,16,0,0.01505600040157636
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,64,32,0,0.013418667018413544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,64,64,0,0.013642666240533194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,128,1,0,0.01657066618402799
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,128,2,0,0.015781333049138386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,128,8,0,0.015013333410024643
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,128,4,0,0.015589332828919092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,128,16,0,0.015050667027632395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,128,32,0,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,128,64,0,0.015834666788578033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,128,4,0,0.015594666202863058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,128,8,0,0.017456000049908955
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,128,2,0,0.015642666568358738
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,128,1,0,0.016544000556071598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,128,16,0,0.015082667271296183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,128,32,0,0.01479999969402949
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,128,64,0,0.014650666465361914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,256,1,0,0.019093333433071773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,256,4,0,0.016352000335852306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,256,2,0,0.017450666675964992
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,256,64,0,0.015840000162522
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,256,8,0,0.016389333953460056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,256,16,0,0.015919999529918034
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,256,1,0,0.01930133377512296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,256,32,0,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,256,4,0,0.0163680004576842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,256,2,0,0.02882133424282074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,256,8,0,0.016309333344300587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,256,16,0,0.016021333634853363
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,256,64,0,0.015802666544914246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,256,32,0,0.015781333049138386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,512,2,0,0.021877333521842957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,512,1,0,0.03196800003449122
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,512,4,0,0.02072000006834666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,512,8,0,0.020117333779732387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,512,16,0,0.019861333072185516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,512,32,0,0.019296000401178997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,512,64,0,0.019466667125622433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,512,1,0,0.03164266546567281
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,512,2,0,0.021888000269730885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,512,4,0,0.020917333662509918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,512,8,0,0.019925333559513092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,512,16,0,0.020202666521072388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,512,32,0,0.01951466624935468
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,512,64,0,0.019466667125622433
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,1024,4,0,0.029391999046007793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,1024,2,0,0.03941333293914795
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,1024,8,0,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,1024,16,0,0.027098665634791057
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,1024,32,0,0.027477333943049114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,1024,64,0,0.026954665780067444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,1024,1,0,0.06835733354091644
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,1024,1,0,0.06724800169467926
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,1024,8,0,0.028346667687098186
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,1024,2,0,0.03963200002908707
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,1024,32,0,0.02741333345572154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,1024,16,0,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,1024,64,0,0.027002667387326557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,1024,4,0,0.029290666182835896
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,1536,4,0,0.03948266555865606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,1536,8,0,0.036320000886917114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,1536,1,0,0.12525866429011026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,1536,2,0,0.06344533463319142
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,1536,64,0,0.03431999931732813
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,1536,32,0,0.034847999612490334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,1536,16,0,0.03548266738653183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,1536,2,0,0.06432533264160156
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,1536,4,0,0.03881066789229711
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,1536,8,0,0.036490666369597115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,1536,16,0,0.03568000098069509
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,1536,64,0,0.03458133339881897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,1536,1,0,0.1269599994023641
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,1536,32,0,0.03453866640726725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,2048,1,0,0.1926506757736206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,2048,64,0,0.04174399872620901
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,2048,2,0,0.09707199533780415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,2048,4,0,0.054383998115857445
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,2048,32,0,0.04269866645336151
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,2048,16,0,0.04320000112056732
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,2048,8,0,0.04460266729195913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,2048,1,0,0.19197332859039307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,2048,8,0,0.04471466441949209
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,2048,4,0,0.05491200089454651
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,2048,2,0,0.09676800171534221
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,2048,32,0,0.042677332957585655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,2048,16,0,0.04324266811211904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,2048,64,0,0.042080000042915344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,3072,4,0,0.1020906666914622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,3072,16,0,0.059290667374928795
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,3072,32,0,0.05760000149408976
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,3072,2,0,0.20414932568868002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,3072,8,0,0.06378133098284404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,3072,64,0,0.05780800183614095
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,3072,1,0,0.3617333173751831
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,3072,32,0,0.05829866727193197
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,3072,8,0,0.06390400230884552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,3072,16,0,0.05913599828879038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,3072,4,0,0.10268266995747884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,3072,2,0,0.20352532466252646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,3072,64,0,0.057664001981417336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,3072,1,0,0.36084266503651935
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,4096,32,0,0.07340266803900401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,4096,16,0,0.07615466912587483
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,4096,8,0,0.08573333422342937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,4096,64,0,0.07313066720962524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,4096,4,0,0.15718400478363037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,4096,2,0,0.3261066675186157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,4096,32,0,0.07358400026957194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,4096,8,0,0.08538666367530823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,4096,16,0,0.0754559983809789
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,4096,4,0,0.15785599748293558
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,4096,1,0,0.5919946829477946
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,4096,64,0,0.07303999861081441
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,4096,2,0,0.3256213267644246
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,4096,1,0,0.5914506514867147
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,6144,64,0,0.10354666908582051
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,6144,16,0,0.11212266484896342
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,6144,32,0,0.10522133111953735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,6144,8,0,0.17686933279037476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,6144,4,0,0.3614240090052287
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,6144,2,0,0.648144006729126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,6144,32,0,0.10492799679438274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,6144,8,0,0.17721599340438843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,6144,16,0,0.11294933160146077
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,6144,4,0,0.35977065563201904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,6144,64,0,0.1034453312555949
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,6144,2,0,0.6508053143819174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,6144,1,0,1.2469226519266765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,8192,16,0,0.1471733351548513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,8192,8,0,0.27614933252334595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,8192,32,0,0.1407360037167867
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,8192,64,0,0.13338667154312134
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,6144,1,0,1.2226826349894206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,8192,4,0,0.5990613301595052
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,8192,2,0,1.0963520208994548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,8192,8,0,0.2792746623357137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,8192,4,0,0.5913866758346558
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,8192,32,0,0.13923199971516928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,8192,16,0,0.14884266257286072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,8192,64,0,0.13383466998736063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,8192,2,0,1.0809866587320964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,8192,1,0,2.0852373441060386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,10240,8,0,0.422165314356486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,10240,16,0,0.2339466611544291
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,10240,64,0,0.16727999846140543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,10240,32,0,0.17587200800577799
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,10240,4,0,0.8902880350748698
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,8192,1,0,2.0830933252970376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,10240,2,0,1.6455787022908528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,10240,16,0,0.2342133323351542
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,10240,8,0,0.4194186528523763
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,10240,32,0,0.1743626594543457
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,10240,64,0,0.16715200742085776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,10240,4,0,0.8693119684855143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,12288,8,0,0.6804373264312744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,10240,1,0,3.3094399770100913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,10240,2,0,1.6568053563435872
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,12288,4,0,1.2132426897684734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,12288,64,0,0.19946666558583578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,12288,32,0,0.20938666661580405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,12288,16,0,0.3213760058085124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,12288,2,0,2.296090602874756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,10240,1,0,3.228783925374349
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,12288,8,0,0.681114673614502
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,12288,16,0,0.3219946622848511
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,12288,32,0,0.20830400784810385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,12288,4,0,1.2278400262196858
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,12288,64,0,0.2010400096575419
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,12288,2,0,2.322927951812744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,16384,16,0,0.5272906621297201
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,16384,8,0,1.1282506783803303
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,16384,32,0,0.2723840077718099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,16384,4,0,2.057962735493978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,1,16384,64,0,0.2660213311513265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,12288,1,0,4.644576072692871
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,16384,2,0,4.09661324818929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,12288,1,0,4.757482528686523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,16384,8,0,1.1269973119099934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,16384,16,0,0.526528000831604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,16384,4,0,2.0674239794413247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,16,1,0,0.014501333236694336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,16384,32,0,0.2734666665395101
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,16,2,0,0.03562133262554804
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,1,16384,64,0,0.26839999357859295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,16,8,0,0.013061333447694778
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,16,4,0,0.01332266628742218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,16,16,0,0.012362666428089142
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,16,32,0,0.012618667135636011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,16,64,0,0.012319999436537424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,16,1,0,0.014463999619086584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,16,4,0,0.013157332936922709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,16,2,0,0.013770667215188345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,16384,2,0,3.9537973403930664
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,16,8,0,0.013023999830087027
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,16,16,0,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,16,64,0,0.012346666306257248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,16,32,0,0.012416000167528788
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,32,1,0,0.014901333798964819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,32,2,0,0.013882666826248169
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,32,4,0,0.013557333499193192
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,32,16,0,0.012757333616415659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,32,8,0,0.012730666746695837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,16384,1,0,8.59990374247233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,32,32,0,0.012613333761692047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,32,64,0,0.012815999488035837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,32,1,0,0.014912000546852747
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,32,4,0,0.01339200014869372
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,32,2,0,0.013999999811251959
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,32,16,0,0.013327999661366144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,32,8,0,0.012847999731699625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,32,32,0,0.012960000584522883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,32,64,0,0.012805332740147909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,64,1,0,0.01617066686352094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,64,2,0,0.01451733335852623
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,64,8,0,0.01443733274936676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,64,4,0,0.015279999623696009
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,64,16,0,0.013786666095256805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,64,32,0,0.013605333864688873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,64,64,0,0.01522133375207583
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,64,2,0,0.014495999862750372
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,64,1,0,0.01621333385507266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,16384,1,0,8.568293253580729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,64,4,0,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,64,8,0,0.013967999567588171
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,64,16,0,0.013797332843144735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,64,32,0,0.013642666240533194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,64,64,0,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,128,1,0,0.01877333347996076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,128,2,0,0.016544000556071598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,128,4,0,0.015589332828919092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,128,16,0,0.015360000232855478
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,128,8,0,0.01551466683546702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,128,32,0,0.014554666976133982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,128,64,0,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,128,1,0,0.018976000448067982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,128,2,0,0.016117333124081295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,128,4,0,0.01595199977358182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,128,8,0,0.015354666858911514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,128,64,0,0.014736000448465347
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,128,16,0,0.015439999600251516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,128,32,0,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,256,1,0,0.028149334092934925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,256,4,0,0.01728533332546552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,256,2,0,0.019173332800467808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,256,8,0,0.01710933322707812
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,256,16,0,0.01602666700879733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,256,32,0,0.015935999651749928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,256,64,0,0.01626666635274887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,256,1,0,0.028389332195123036
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,256,4,0,0.01743999992807706
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,256,2,0,0.019013332823912304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,256,16,0,0.016719999412695568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,256,64,0,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,256,32,0,0.015834666788578033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,256,8,0,0.017792000124851864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,512,1,0,0.05648533503214518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,512,4,0,0.022309333086013794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,512,8,0,0.020869334538777668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,512,2,0,0.032586666444937386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,512,16,0,0.02072000006834666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,512,32,0,0.019802667200565338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,512,64,0,0.019653332730134327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,512,1,0,0.0574239989121755
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,512,2,0,0.03230399886767069
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,512,4,0,0.02237333357334137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,512,8,0,0.02093333254257838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,512,32,0,0.020037333170572918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,512,64,0,0.019578666736682255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,512,16,0,0.02046400060256322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,1024,8,0,0.02972800036271413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,1024,1,0,0.12572800119717917
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,1024,32,0,0.0281333327293396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,1024,64,0,0.02739199995994568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,1024,4,0,0.04005333284536997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,1024,2,0,0.06811733543872833
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,1024,16,0,0.028560000161329906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,1024,4,0,0.04025600105524063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,1024,1,0,0.12589333454767862
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,1024,64,0,0.027808000644048054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,1024,16,0,0.028437333802382152
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,1024,32,0,0.027935999135176342
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,1024,2,0,0.06772266825040181
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,1024,8,0,0.029887999097506206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,1536,4,0,0.06496533254782359
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,1536,32,0,0.03603200117746989
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,1536,16,0,0.03698666642109553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,1536,64,0,0.03555200000603994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,1536,8,0,0.04037333279848099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,1536,2,0,0.1269866625467936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,1536,1,0,0.22164267301559448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,1536,8,0,0.04074133435885111
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,1536,64,0,0.035274667044480644
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,1536,16,0,0.03716266651948293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,1536,2,0,0.1272160013516744
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,1536,32,0,0.03612799942493439
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,1536,4,0,0.06491733094056447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,2048,16,0,0.04576533536116282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,1536,1,0,0.22310932477315268
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,2048,4,0,0.09850666920344035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,2048,64,0,0.04330666859944662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,2048,8,0,0.0557226687669754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,2048,32,0,0.044026667873064675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,2048,2,0,0.19430400927861533
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,2048,1,0,0.3462560176849365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,2048,16,0,0.04576533536116282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,2048,4,0,0.09873066345850627
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,2048,64,0,0.043178667624791466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,2048,32,0,0.04394666850566864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,2048,8,0,0.056143999099731445
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,2048,2,0,0.19197332859039307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,2048,1,0,0.35115734736124676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,3072,64,0,0.058890665570894875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,3072,8,0,0.10420266787211101
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,3072,32,0,0.06078400214513143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,3072,4,0,0.2084853251775106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,3072,16,0,0.065610667069753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,3072,2,0,0.36484265327453613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,3072,16,0,0.06586666901906331
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,3072,8,0,0.10443733135859172
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,3072,64,0,0.05885333319505056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,3072,32,0,0.06081599990526835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,3072,4,0,0.20773865779240927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,3072,1,0,0.6831680138905843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,4096,32,0,0.0784693310658137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,4096,16,0,0.08759466807047527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,3072,2,0,0.36559466520945233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,4096,8,0,0.15902933478355408
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,4096,4,0,0.32702932755152386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,4096,64,0,0.07474133372306824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,3072,1,0,0.6948213577270508
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,4096,2,0,0.6055146853129069
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,4096,16,0,0.0881173312664032
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,4096,32,0,0.07932266592979431
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,4096,8,0,0.15875200430552164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,4096,64,0,0.0748533308506012
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,4096,4,0,0.32577067613601685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,4096,1,0,1.1636586983998616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,4096,2,0,0.5971519947052002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,6144,8,0,0.37298667430877686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,6144,32,0,0.11635200182596843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,6144,64,0,0.10932266712188721
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,6144,4,0,0.6486506859461466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,6144,16,0,0.17959467569986978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,4096,1,0,1.1558293501536052
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,6144,2,0,1.235749324162801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,6144,16,0,0.17947200934092203
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,6144,8,0,0.3657439947128296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,6144,32,0,0.11749333143234253
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,6144,64,0,0.11028266946474712
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,6144,4,0,0.65174400806427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,6144,2,0,1.2486826578776042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,8192,16,0,0.28167466322580975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,8192,8,0,0.5987093448638916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,6144,1,0,2.390181382497152
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,8192,32,0,0.1534986694653829
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,8192,64,0,0.14592533310254416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,8192,4,0,1.1029173533121746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,8192,16,0,0.2848373254140218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,8192,8,0,0.5946240027745565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,8192,2,0,2.0803732872009277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,8192,4,0,1.109615961710612
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,6144,1,0,2.433786710103353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,8192,64,0,0.14614400267601013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,8192,32,0,0.15372799833615622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,8192,2,0,2.0917332967122397
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,10240,16,0,0.43142934640248615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,10240,8,0,0.8759520053863525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,8192,1,0,4.429162661234538
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,10240,64,0,0.18242132663726807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,10240,32,0,0.24076799551645914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,10240,4,0,1.669066588083903
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,10240,16,0,0.4326080083847046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,10240,8,0,0.8850879669189453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,8192,1,0,4.2457278569539385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,10240,2,0,3.2190933227539062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,10240,64,0,0.18121065696080527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,10240,32,0,0.24141865968704224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,10240,4,0,1.6598079999287922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,12288,16,0,0.6836106777191162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,12288,8,0,1.2400800387064617
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,10240,2,0,3.2436211903889975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,12288,64,0,0.21729065974553427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,12288,32,0,0.33265600601832074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,12288,4,0,2.3323680559794107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,10240,1,0,6.738298416137695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,12288,16,0,0.6832853158315023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,12288,8,0,1.228602647781372
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,12288,2,0,4.893962542215983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,10240,1,0,6.801845550537109
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,12288,32,0,0.3303040067354838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,12288,64,0,0.21663999557495117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,12288,4,0,2.334671974182129
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,16384,16,0,1.1386346817016602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,16384,8,0,2.0845066706339517
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,12288,2,0,4.702975908915202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,2,16384,64,0,0.2887253363927205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,16384,32,0,0.5352799892425537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,16384,4,0,4.183370590209961
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,12288,1,0,9.578304290771484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,16384,8,0,2.057578722635905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,16384,16,0,1.1257279713948567
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,16384,2,0,8.590714772542318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,2,16384,64,0,0.2837280035018921
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,16384,32,0,0.5293226639429728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,12288,1,0,9.782623926798502
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,16,1,0,0.01842133328318596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,16384,4,0,4.088831901550293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,16,2,0,0.014922666052977243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,16,4,0,0.013845333208640417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,16,8,0,0.013536000003417334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,16,16,0,0.01440000037352244
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,16,32,0,0.012560000022252401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,16,64,0,0.01249066616098086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,16,1,0,0.018629333625237148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,16,2,0,0.01441066712141037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,16,4,0,0.01392000044385592
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,16,8,0,0.013306666165590286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,16,16,0,0.012800000607967377
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,16,32,0,0.01258133351802826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,16,64,0,0.012565333396196365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,32,1,0,0.018138666947682697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,32,2,0,0.014901333798964819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,32,4,0,0.013946666071812311
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,32,8,0,0.013605333864688873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,32,16,0,0.013269333789745966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,32,32,0,0.012938667088747025
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,16384,2,0,8.377301534016928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,32,64,0,0.012645332763592402
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,32,1,0,0.018351999421914417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,32,2,0,0.014831999937693277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,32,4,0,0.01392000044385592
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,32,8,0,0.013669333110253016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,32,16,0,0.013290667285521826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,32,32,0,0.012757333616415659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,32,64,0,0.012645332763592402
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,64,1,0,0.020165332903464634
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,64,2,0,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,64,4,0,0.014864000181357065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,64,8,0,0.014426667243242264
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,64,16,0,0.014101333916187286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,64,32,0,0.014032000054915747
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,64,64,0,0.013674666484196981
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,64,1,0,0.0201706662774086
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,64,2,0,0.016154666741689045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,64,4,0,0.014741333822409311
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,64,8,0,0.01423466702302297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,64,16,0,0.014042666802803675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,64,32,0,0.013829333086808523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,64,64,0,0.013642666240533194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,16384,1,0,17.231610616048176
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,128,1,0,0.02773333340883255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,128,2,0,0.018816000471512478
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,128,16,0,0.015605332950750986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,128,8,0,0.01594666639963786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,128,4,0,0.016437333077192307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,128,32,0,0.01524266724785169
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,128,64,0,0.015061333775520325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,128,1,0,0.02714666724205017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,128,4,0,0.0164533331990242
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,128,2,0,0.018725333114465077
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,128,8,0,0.015957333147525787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,128,16,0,0.015423999478419622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,128,64,0,0.015103999525308609
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,128,32,0,0.015226667126019796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,256,1,0,0.0521066685517629
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,256,2,0,0.028490667541821797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,256,64,0,0.01609066625436147
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,256,16,0,0.017114666601022083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,256,32,0,0.016415999581416447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,256,8,0,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,256,4,0,0.019194666296243668
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,256,2,0,0.028560000161329906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,256,8,0,0.017344000438849132
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,256,1,0,0.05167999863624573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,256,64,0,0.016010666886965435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,256,32,0,0.01629866659641266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,256,4,0,0.01907733331123988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,256,16,0,0.01691199963291486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,512,4,0,0.032831999162832894
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,512,32,0,0.020901332298914593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,512,8,0,0.02250133454799652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,512,2,0,0.05719999969005585
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,512,16,0,0.021530665457248688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,512,64,0,0.019930666933457058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,512,1,0,0.09286399682362874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,512,32,0,0.020373333245515823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,512,64,0,0.020293333878119785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,512,4,0,0.03314133236805598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,512,8,0,0.02250666668017705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,512,1,0,0.09219200412432353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,512,16,0,0.021509334444999695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,512,2,0,0.05689600110054016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,16384,1,0,17.291642506917317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,1024,32,0,0.02916266769170761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,1024,64,0,0.028704000016053517
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,1024,16,0,0.03038399914900462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,1024,8,0,0.041290665666262306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,1024,4,0,0.06979733208815257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,1024,2,0,0.12596266468365988
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,1024,32,0,0.02920000006755193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,1024,8,0,0.0415786678592364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,1024,4,0,0.06914133330186208
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,1024,16,0,0.030602666238943737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,1024,64,0,0.040949332217375435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,1024,1,0,0.22926400105158487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,1024,2,0,0.12610666950543722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,1024,1,0,0.22987733284632364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,1536,32,0,0.03839999934037527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,1536,16,0,0.04278400043646494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,1536,64,0,0.036917333801587425
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,1536,8,0,0.06730666756629944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,1536,4,0,0.1290880044301351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,1536,2,0,0.22484799226125082
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,1536,32,0,0.03824000060558319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,1536,16,0,0.0430026650428772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,1536,8,0,0.06693333387374878
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,1536,64,0,0.03706666578849157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,1536,4,0,0.12852266430854797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,1536,2,0,0.22730666399002075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,1536,1,0,0.4170773426691691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,2048,16,0,0.058037335673967995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,2048,8,0,0.10129599769910176
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,2048,32,0,0.048751999934514366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,2048,4,0,0.19624000787734985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,2048,64,0,0.04520000020662943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,1536,1,0,0.42586131890614826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,2048,32,0,0.048437332113583885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,2048,16,0,0.05829866727193197
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,2048,8,0,0.10162132978439331
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,2048,2,0,0.3548159996668498
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,2048,4,0,0.19804799556732178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,2048,64,0,0.04515733321507772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,2048,2,0,0.35210132598876953
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,2048,1,0,0.6705333391825358
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,3072,16,0,0.10961600144704182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,3072,8,0,0.21294933557510376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,3072,32,0,0.06963733335336049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,3072,64,0,0.06585599978764851
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,3072,4,0,0.37373332182566327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,2048,1,0,0.6707786719004313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,3072,2,0,0.6960960229237875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,3072,8,0,0.2108746568361918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,3072,32,0,0.06930133203665416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,3072,16,0,0.10915733377138774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,3072,64,0,0.06471466521422069
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,3072,4,0,0.36961066722869873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,3072,2,0,0.7058506806691488
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,4096,16,0,0.16583466529846191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,4096,8,0,0.3344693183898926
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,4096,32,0,0.09373866518338521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,3072,1,0,1.3371040026346843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,4096,64,0,0.08523199955622356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,4096,4,0,0.6098613341649374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,3072,1,0,1.3685493469238281
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,4096,2,0,1.1463359991709392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,4096,16,0,0.166703999042511
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,4096,8,0,0.33196266492207843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,4096,32,0,0.09524800380071004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,4096,4,0,0.59826131661733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,4096,64,0,0.08515733480453491
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,6144,16,0,0.37166933218638104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,4096,2,0,1.168943961461385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,6144,8,0,0.6589813232421875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,4096,1,0,2.249743938446045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,6144,32,0,0.18818666537602743
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,6144,64,0,0.12446399529774983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,6144,4,0,1.2566239833831787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,4096,1,0,2.264106591542562
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,6144,16,0,0.37808001041412354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,6144,8,0,0.6673226356506348
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,6144,64,0,0.12528000275293985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,6144,32,0,0.18854933977127075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,6144,2,0,2.4333173433939614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,6144,4,0,1.2348159948984783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,8192,16,0,0.6000533501307169
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,8192,8,0,1.100826660792033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,6144,2,0,2.410378615061442
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,4,8192,64,0,0.162800004084905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,8192,32,0,0.2924426595369975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,8192,4,0,2.105658690134684
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,8192,8,0,1.1048373381296794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,6144,1,0,5.110143979390462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,8192,2,0,4.2145334879557295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,6144,1,0,4.719578742980957
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,8192,32,0,0.29553600152333576
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,8192,16,0,0.6073280175526937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,8192,4,0,2.1001173655192056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,16,2,0,0.018426666657129925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,16,1,0,0.024336000283559162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,4,8192,64,0,0.16480533281962076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,16,4,0,0.01458666721979777
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,16,8,0,0.013882666826248169
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,16,16,0,0.01332266628742218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,16,32,0,0.012560000022252401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,16,64,0,0.012863999853531519
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,16,1,0,0.02442666639884313
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,16,2,0,0.01850133389234543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,16,4,0,0.0262719988822937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,16,8,0,0.013466666142145792
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,16,16,0,0.013354666531085968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,16,32,0,0.013045333325862885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,16,64,0,0.012565333396196365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,32,2,0,0.01781333362062772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,32,4,0,0.014864000181357065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,32,1,0,0.024538666009902954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,32,8,0,0.013776000589132309
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,32,16,0,0.013616000612576803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,32,32,0,0.01328533391157786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,32,64,0,0.013093333691358566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,32,1,0,0.024559999505678814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,32,2,0,0.017808000246683758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,32,4,0,0.01471466695268949
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,32,8,0,0.013818666338920593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,32,16,0,0.026922665536403656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,32,32,0,0.01320533330241839
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,8192,2,0,4.318517367045085
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,32,64,0,0.012703999876976013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,64,2,0,0.019968000551064808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,64,8,0,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,64,4,0,0.016000000139077503
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,64,1,0,0.030245333909988403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,64,16,0,0.014757333944241205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,64,32,0,0.01431999976436297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,64,64,0,0.013834666460752487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,64,16,0,0.01461333284775416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,64,4,0,0.01594666639963786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,64,2,0,0.020213333268960316
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,64,8,0,0.014767999450365702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,64,1,0,0.030069333811601002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,64,32,0,0.014229333649079004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,64,64,0,0.014005333185195923
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,128,16,0,0.016149333367745083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,128,4,0,0.020527999848127365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,128,2,0,0.028778667251269024
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,128,1,0,0.04784533381462097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,128,32,0,0.015520000209410986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,128,8,0,0.016415999581416447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,128,64,0,0.015322666615247726
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,8192,1,0,8.980901082356771
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,128,4,0,0.018709332992633183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,128,8,0,0.016421332955360413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,128,2,0,0.028549333413441975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,128,1,0,0.04789866507053375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,128,32,0,0.015578666081031164
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,128,16,0,0.015962666521469753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,8192,1,0,8.904122670491537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,128,64,0,0.015722667177518208
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,256,4,0,0.029631999631722767
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,256,16,0,0.018629333625237148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,256,8,0,0.019610666980346043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,256,64,0,0.017008000363906223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,256,2,0,0.052069331208864846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,256,32,0,0.017909333109855652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,256,1,0,0.07852800190448761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,256,8,0,0.01960533360640208
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,256,4,0,0.029738667110602062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,256,16,0,0.01802666609485944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,256,32,0,0.017770666629076004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,256,2,0,0.053317333261171974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,256,1,0,0.07858133316040039
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,256,64,0,0.016810666769742966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,512,8,0,0.03367999941110611
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,512,16,0,0.023525332411130268
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,512,32,0,0.02319466571013133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,512,64,0,0.021349333226680756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,512,4,0,0.05946133534113566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,512,8,0,0.03430933256944021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,512,1,0,0.16929600636164346
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,512,32,0,0.021856000026067097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,512,4,0,0.0591786652803421
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,512,16,0,0.02347733328739802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,512,2,0,0.09434133768081665
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,512,2,0,0.09316800038019817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,512,64,0,0.021375998854637146
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,512,1,0,0.16856533288955688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,1024,64,0,0.03053866575161616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,1024,16,0,0.04418666660785675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,1024,32,0,0.03398933261632919
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,1024,4,0,0.12971733013788858
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,1024,8,0,0.07287466526031494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,1024,2,0,0.22884267568588257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,1024,16,0,0.04428266485532125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,1024,32,0,0.034373333056767784
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,1024,8,0,0.07259733478228252
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,1024,4,0,0.12960533301035562
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,1024,64,0,0.03090133269627889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,1024,2,0,0.2323253353436788
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,1536,16,0,0.07186133166154225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,1536,8,0,0.13412266969680786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,1536,32,0,0.04714666803677877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,1024,1,0,0.4327626625696818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,1536,4,0,0.22935465971628824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,1536,64,0,0.04308266441027323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,1536,16,0,0.07175466914971669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,1536,2,0,0.42977599302927655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,1536,8,0,0.13351466258366904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,1024,1,0,0.4385226567586263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,1536,4,0,0.22815465927124023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,1536,32,0,0.0468746672074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,1536,64,0,0.042992000778516136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,1536,2,0,0.4292213519414266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,2048,8,0,0.1995840072631836
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,2048,16,0,0.10816533366839091
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,1536,1,0,0.8238133589426676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,2048,32,0,0.0642986645301183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,2048,64,0,0.05559466779232025
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,2048,4,0,0.35660266876220703
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,1536,1,0,0.8174560070037842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,2048,16,0,0.10735999544461568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,2048,8,0,0.2009119987487793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,2048,32,0,0.06384533147017162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,2048,2,0,0.6856213410695394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,2048,4,0,0.3616480032602946
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,2048,64,0,0.05553600192070007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,3072,16,0,0.21850667397181192
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,2048,2,0,0.6801333427429199
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,3072,8,0,0.3840906620025635
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,3072,32,0,0.11611200372378032
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,3072,64,0,0.07771733403205872
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,3072,4,0,0.7118399937947592
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,2048,1,0,1.3214560349782307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,3072,16,0,0.21855467557907104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,2048,1,0,1.3434826532999675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,3072,2,0,1.3522879282633464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,3072,8,0,0.3811466693878174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,3072,32,0,0.11659199992815654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,3072,64,0,0.07809600234031677
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,3072,4,0,0.706117312113444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,4096,16,0,0.34245868523915607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,3072,2,0,1.3508960405985515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,4096,8,0,0.622170646985372
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,4096,32,0,0.17665600776672363
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,8,4096,64,0,0.10406933228174846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,4096,4,0,1.1603519916534424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,3072,1,0,2.6654346783955893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,4096,8,0,0.6121813456217448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,4096,16,0,0.3424533208211263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,4096,4,0,1.155776023864746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,4096,32,0,0.1764799952507019
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,8,4096,64,0,0.10454932848612468
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,3072,1,0,2.709301312764486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,16,1,0,0.03589333345492681
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,16,2,0,0.024069334069887798
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,16,4,0,0.01754666616519292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,16,8,0,0.03305600086847941
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,4096,2,0,2.2854933738708496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,16,16,0,0.014954666296641031
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,16,64,0,0.013237333546082178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,16,32,0,0.013354666531085968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,16,1,0,0.035829332967599235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,16,2,0,0.02388266722361247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,16,4,0,0.017594666530688603
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,16,8,0,0.014554666976133982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,16,16,0,0.013765333841244379
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,16,32,0,0.01340266689658165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,16,64,0,0.01303999995191892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,32,1,0,0.039359999199708305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,32,2,0,0.02422933280467987
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,32,4,0,0.017637333522240322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,32,8,0,0.014671999961137772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,4096,2,0,2.2646239598592124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,32,16,0,0.014074667046467463
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,32,32,0,0.013514666507641474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,32,64,0,0.013343999783198038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,32,2,0,0.02462933212518692
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,32,4,0,0.01800000046690305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,32,8,0,0.014576000471909841
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,32,1,0,0.03951466580231985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,32,32,0,0.013525333255529404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,32,16,0,0.0138026662170887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,32,64,0,0.013541333377361298
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,64,2,0,0.030432000756263733
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,64,8,0,0.016565332810084026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,64,1,0,0.04974933465321859
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,64,4,0,0.045066664616266884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,64,32,0,0.014554666976133982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,64,16,0,0.014869333555301031
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,4096,1,0,4.761018753051758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,64,64,0,0.014346666634082794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,64,2,0,0.030250666042168934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,64,8,0,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,64,16,0,0.014906667172908783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,64,4,0,0.02033599962790807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,64,1,0,0.04948266843954722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,64,32,0,0.014639999717473984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,64,64,0,0.014426667243242264
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,128,16,0,0.017029333859682083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,128,8,0,0.019973333925008774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,128,32,0,0.016965333372354507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,128,64,0,0.015781333049138386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,128,2,0,0.05087466537952423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,128,4,0,0.02890666574239731
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,128,1,0,0.0841493308544159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,128,4,0,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,128,8,0,0.035002666215101876
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,128,16,0,0.0169813334941864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,128,32,0,0.016762666404247284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,128,64,0,0.021365332106749218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,128,2,0,0.04898133377234141
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,128,1,0,0.08380267024040222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,256,64,0,0.0182239996890227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,4096,1,0,4.567370732625325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,256,32,0,0.023077333966890972
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,256,16,0,0.021615999440352123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,256,8,0,0.030943999687830608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,256,2,0,0.0798826664686203
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,256,4,0,0.0545653353134791
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,256,8,0,0.0313226655125618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,256,32,0,0.018794666975736618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,256,1,0,0.14102400342623392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,256,4,0,0.05365333457787832
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,256,16,0,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,256,64,0,0.018277333428462345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,256,2,0,0.08258666594823201
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,512,16,0,0.03728000074625015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,512,32,0,0.02701866626739502
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,512,64,0,0.02351466566324234
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,256,1,0,0.14078399538993835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,512,8,0,0.06321600079536438
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,512,4,0,0.09711466232935588
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,512,8,0,0.06294933458169301
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,512,32,0,0.027136000494162243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,512,16,0,0.03718933214743932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,512,4,0,0.09795733292897542
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,512,2,0,0.17196800311406454
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,512,64,0,0.02350933353106181
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,512,2,0,0.1715573271115621
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,512,1,0,0.31836267312367755
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,1024,16,0,0.07872533301512401
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,1024,32,0,0.05065066615740458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,1024,8,0,0.13691199819246927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,1024,64,0,0.04106666644414266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,512,1,0,0.3172373374303182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,1024,4,0,0.2385173241297404
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,1024,32,0,0.050586665670077004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,1024,8,0,0.13597866892814636
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,1024,16,0,0.07889600098133087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,1024,2,0,0.43930665651957196
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,1024,64,0,0.040976000328858696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,1024,4,0,0.2392373283704122
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,1024,2,0,0.4460373322168986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,1536,16,0,0.1425226628780365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,1536,8,0,0.23930666844050089
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,1536,64,0,0.0555626650651296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,1024,1,0,0.8502026398976644
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,1536,32,0,0.08082666496435802
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,1536,4,0,0.43159464995066327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,1536,8,0,0.23986667394638062
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,1536,16,0,0.14094932874043783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,1536,2,0,0.8266293207804362
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,1024,1,0,0.8466560045878092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,1536,4,0,0.4320160150527954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,1536,32,0,0.08075200021266937
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,1536,64,0,0.0555626650651296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,2048,8,0,0.3726400136947632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,2048,16,0,0.21136534214019775
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,1536,2,0,0.8406559626261393
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,2048,32,0,0.11870400110880534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,16,2048,64,0,0.07517333328723907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,2048,4,0,0.6870880126953125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,1536,1,0,1.6291413307189941
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,2048,8,0,0.3672586679458618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,1536,1,0,1.650800069173177
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,2048,16,0,0.21290133396784464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,2048,2,0,1.3207733631134033
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,16,2048,64,0,0.0764160007238388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,16,1,0,0.06057600180308024
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,16,2,0,0.03505066782236099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,2048,32,0,0.1176479955514272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,16,4,0,0.02346666653951009
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,16,8,0,0.01752000053723653
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,2048,4,0,0.6881439685821533
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,16,16,0,0.014485333114862442
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,16,32,0,0.013674666484196981
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,32,16,64,0,0.013530666629473368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,16,2,0,0.035445332527160645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,16,4,0,0.02332799881696701
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,16,8,0,0.01754133279124896
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,16,1,0,0.0611413319905599
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,16,16,0,0.014368000129858652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,16,32,0,0.013701333353916803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,32,16,64,0,0.013514666507641474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,32,2,0,0.039808000127474465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,32,4,0,0.024490666886170704
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,32,8,0,0.018133333573738735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,32,1,0,0.06764799853165944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,32,16,0,0.015024000157912573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,32,32,0,0.014064000298579534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,32,32,64,0,0.013749333719412485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,2048,2,0,1.3542240460713704
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,32,4,0,0.024495999018351238
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,32,2,0,0.03977066775163015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,32,8,0,0.018191999445358913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,32,16,0,0.01492799942692121
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,32,1,0,0.06694933275381725
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,32,32,0,0.0141546664138635
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,32,32,64,0,0.01392000044385592
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,64,4,0,0.031173333525657654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,64,8,0,0.0206986665725708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,64,16,0,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,64,2,0,0.05006400247414907
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,64,32,0,0.015541333705186844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,64,1,0,0.08566932876904805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,32,64,64,0,0.014869333555301031
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,64,4,0,0.031290667752424874
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,64,8,0,0.020917333662509918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,64,2,0,0.05023466547330221
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,64,16,0,0.01681600014368693
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,64,32,0,0.015194666882356008
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,2048,1,0,2.6581172943115234
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,64,1,0,0.08597333232561748
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,32,64,64,0,0.015301333119471868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,128,8,0,0.030821333328882854
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,128,16,0,0.020687999824682873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,128,32,0,0.017994667092959087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,32,128,64,0,0.017525333911180496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,128,4,0,0.05014933149019877
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,128,2,0,0.08825066685676575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,128,16,0,0.020303999384244282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,128,8,0,0.03065066784620285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,128,32,0,0.01788266624013583
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,128,4,0,0.04974400003751119
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,128,1,0,0.15357333421707153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,128,2,0,0.08575999736785889
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,32,128,64,0,0.017781333376963932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,128,1,0,0.15495999654134116
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,256,8,0,0.05754133562246958
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,256,16,0,0.03533866753180822
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,256,32,0,0.02366400013367335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,256,4,0,0.08353599905967712
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,32,256,64,0,0.020527999848127365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,256,2,0,0.1432906687259674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,256,8,0,0.05771199862162272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,256,16,0,0.03457599878311157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,256,4,0,0.08388266960779826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,256,32,0,0.023887999355793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,32,256,64,0,0.02146133283774058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,256,2,0,0.14225600163141885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,256,1,0,0.2640639940897624
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,512,16,0,0.06986133257548015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,512,8,0,0.10284266869227092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,256,1,0,0.26321067412694293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,512,32,0,0.04394133388996124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,2048,1,0,2.6703414916992188
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,512,4,0,0.1779359976450602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,32,512,64,0,0.033930666744709015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,512,32,0,0.043978666265805565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,512,16,0,0.07042666773001353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,512,8,0,0.10504532853762309
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,512,2,0,0.3221226731936137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,32,512,64,0,0.0346666673819224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,512,4,0,0.17690666516621908
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,512,2,0,0.3218933343887329
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,1024,16,0,0.14680000146230063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,1024,8,0,0.2463573416074117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,32,1024,64,0,0.0617439995209376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,1024,32,0,0.09078933795293172
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,512,1,0,0.6269546747207642
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,1024,4,0,0.44835734367370605
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,1024,16,0,0.1461120049158732
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,512,1,0,0.6183626651763916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,1024,8,0,0.2525706688563029
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,1024,32,0,0.09010133147239685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,32,1024,64,0,0.062314664324124656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,16,1,0,0.10812800129254659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,1024,4,0,0.4465866486231486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,16,4,0,0.03492266684770584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,16,2,0,0.060778667529424034
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,16,8,0,0.02382933348417282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,16,32,0,0.014576000471909841
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,16,16,0,0.0235359991590182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,64,16,64,0,0.013834666460752487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,16,4,0,0.035216001172860466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,16,2,0,0.06112533311049143
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,1024,2,0,0.874293327331543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,16,1,0,0.10819733142852783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,16,8,0,0.0235359991590182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,16,16,0,0.017583999782800674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,16,32,0,0.014730667074521383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,64,16,64,0,0.01394133393963178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,32,2,0,0.068271999557813
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,32,8,0,0.024869332710901897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,32,4,0,0.04015466570854187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,1024,2,0,0.8661386966705322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,32,16,0,0.0183146670460701
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,32,1,0,0.12427733341852824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,64,32,64,0,0.014282666146755219
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,32,32,0,0.01504533365368843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,32,4,0,0.040789333482583366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,32,8,0,0.02491733431816101
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,32,16,0,0.018351999421914417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,32,32,0,0.01515199989080429
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,32,2,0,0.06842666864395142
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,32,1,0,0.12324800093968709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,64,32,64,0,0.0145066666106383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,64,16,0,0.02141333371400833
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,64,4,0,0.05153066913286845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,64,8,0,0.03270400067170461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,64,32,0,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,64,64,64,0,0.01658133293191592
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,64,2,0,0.08703999718030293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,64,1,0,0.15850133697191873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,64,4,0,0.05162666738033295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,64,8,0,0.03297599901755651
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,64,16,0,0.021381333470344543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,64,32,0,0.017808000246683758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,1024,1,0,1.7354346911112468
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,64,64,64,0,0.016154666741689045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,64,2,0,0.086709330479304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,64,1,0,0.15691733360290527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,128,16,0,0.03465600063403448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,64,128,64,0,0.019578666736682255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,128,8,0,0.05644266804059347
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,128,32,0,0.023503998915354412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,128,4,0,0.09154666463534038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,128,16,0,0.03457599878311157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,128,8,0,0.053130666414896645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,128,4,0,0.09072533249855042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,128,2,0,0.1567626694838206
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,128,32,0,0.023733332753181458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,1024,1,0,1.7017173767089844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,64,128,64,0,0.019653332730134327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,128,2,0,0.1568106710910797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,128,1,0,0.28700800736745197
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,256,32,0,0.04257600009441376
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,64,256,64,0,0.03192000091075897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,256,16,0,0.06458666423956554
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,256,8,0,0.09268800417582194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,128,1,0,0.2879040042559306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,256,4,0,0.1495466629664103
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,256,8,0,0.088837335507075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,256,32,0,0.04289066791534424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,256,16,0,0.064410666624705
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,256,4,0,0.14909866452217102
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,256,2,0,0.26709866523742676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,64,256,64,0,0.03218133250872294
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,256,2,0,0.2675306598345439
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,512,16,0,0.1134986678759257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,512,8,0,0.1874026656150818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,512,32,0,0.08158933122952779
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,256,1,0,0.5099733273188273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,64,512,64,0,0.05542399982611338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,512,4,0,0.333840012550354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,256,1,0,0.5079520146052042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,512,8,0,0.18760534127553305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,512,16,0,0.11378666758537292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,512,32,0,0.08195200065771739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,64,512,64,0,0.05541866521040598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,16,2,0,0.10911466677983601
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,512,4,0,0.33630398909250897
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,16,1,0,0.22802666823069254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,16,4,0,0.06106133262316386
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,16,8,0,0.0353973334034284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,16,16,0,0.02407466620206833
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,512,2,0,0.6354666550954183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,128,16,64,0,0.01504533365368843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,16,32,0,0.01810666670401891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,16,16,0,0.023941333095232647
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,16,2,0,0.10817600289980571
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,16,8,0,0.035418666899204254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,16,4,0,0.06187733511130015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,16,1,0,0.22913599014282227
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,16,32,0,0.018085333208243053
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,128,16,64,0,0.015109332899252573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,512,2,0,0.6254719893137614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,32,4,0,0.0703306645154953
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,32,8,0,0.04205333193143209
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,32,16,0,0.02584533393383026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,32,2,0,0.12381333112716675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,32,32,0,0.019215999792019527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,128,32,64,0,0.016250666230916977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,32,1,0,0.24194133281707764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,32,8,0,0.04223999877770742
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,32,4,0,0.0701279987891515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,32,16,0,0.026144000391165417
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,32,2,0,0.12502933541933695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,32,32,0,0.01939733326435089
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,128,32,64,0,0.01613333324591319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,32,1,0,0.2416693369547526
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,64,8,0,0.055061335364977516
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,512,1,0,1.257759968439738
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,64,16,0,0.03615466753641764
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,64,4,0,0.09089600046475728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,64,32,0,0.02463999887307485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,128,64,64,0,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,64,2,0,0.1585599978764852
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,64,8,0,0.05486399928728739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,64,16,0,0.03628266602754593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,64,32,0,0.024314666787783306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,64,4,0,0.09021332859992981
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,128,64,64,0,0.019823999454577763
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,64,2,0,0.15807466705640158
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,64,1,0,0.3007253408432007
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,128,8,0,0.09456533193588257
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,128,16,0,0.060319999853769936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,128,32,0,0.04253333310286204
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,64,1,0,0.3004639943440755
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,128,4,0,0.16357333461443582
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,128,128,64,0,0.03205333401759466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,512,1,0,1.2385066350301106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,128,16,0,0.060234665870666504
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,128,2,0,0.29798932870229083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,128,8,0,0.09692266583442688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,128,32,0,0.044351999958356224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,128,4,0,0.16343999902407327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,128,128,64,0,0.032138665517171226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,256,16,0,0.10007466872533162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,128,2,0,0.2986666758855184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,128,1,0,0.5565760135650635
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,256,8,0,0.16022933522860208
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,256,32,0,0.07684800028800964
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,128,256,64,0,0.055733333031336464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,256,4,0,0.2783520023028056
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,128,1,0,0.5580426851908366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,256,16,0,0.09888533751169841
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,256,32,0,0.0767680009206136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,256,8,0,0.16128533085187277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,128,256,64,0,0.056314667065938316
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,256,2,0,0.5189119974772135
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,256,4,0,0.27796266476313275
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,256,16,4,0,0.11053867141405742
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,256,16,2,0,0.22756266593933105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,256,16,8,0,0.06383466720581055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,256,16,16,0,0.036501333117485046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,256,16,1,0,0.4659786621729533
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,256,16,32,0,0.02518933266401291
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,256,16,64,0,0.01923199991385142
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,256,16,4,0,0.10994133353233337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,256,16,8,0,0.06351466476917267
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,256,16,16,0,0.036992001036802925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,256,16,2,0,0.23231999079386392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,256,2,0,0.5167946815490723
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,256,16,1,0,0.47445865472157794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,256,16,64,0,0.01929066702723503
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,256,16,32,0,0.02489600082238515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,256,32,32,0,0.029194665451844532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,256,32,16,0,0.04865066707134247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,256,32,8,0,0.07314133147398631
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,256,32,4,0,0.12667199969291687
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,256,32,64,0,0.02139200021823247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,256,32,2,0,0.2480319937070211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,256,32,16,0,0.044735997915267944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,256,32,8,0,0.07276799778143565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,256,32,4,0,0.1288693348566691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,256,1,0,1.0121866861979167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,256,32,32,0,0.02920000006755193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,256,32,2,0,0.2465226650238037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,256,32,64,0,0.021488000949223835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,256,32,1,0,0.49033065636952716
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,256,64,16,0,0.06205866734186808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,256,64,32,0,0.043791999419530235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,256,32,1,0,0.4898560047149658
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,256,64,8,0,0.09662399689356486
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,256,64,64,0,0.03292799989382426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,256,64,4,0,0.16517866651217142
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,256,1,0,1.0121493339538574
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,256,64,16,0,0.06180266539255778
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,256,64,8,0,0.096778670946757
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,256,64,2,0,0.3012053370475769
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,256,64,32,0,0.04531733194986979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,256,64,4,0,0.16690133015314737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,256,64,64,0,0.03315199911594391
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,256,64,2,0,0.3017706672350566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,256,128,16,0,0.1071573297182719
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,256,128,8,0,0.1767253279685974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,256,64,1,0,0.5883040030797323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,256,128,32,0,0.07402666906515758
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,1,256,128,64,0,0.05459733307361603
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,256,128,4,0,0.2999946673711141
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,256,64,1,0,0.5870506763458252
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,256,128,16,0,0.10737599929173787
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,256,128,8,0,0.17347200711568198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,16,1,0,0.015050667027632395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,1,256,128,64,0,0.0552106648683548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,256,128,32,0,0.07363733152548473
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,256,128,2,0,0.5790506601333618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,16,2,0,0.013701333353916803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,256,128,4,0,0.3015999992688497
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,16,4,0,0.013349333157142004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,16,8,0,0.013141332815090815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,16,16,0,0.012810666114091873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,16,32,0,0.012293333808581034
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,16,64,0,0.012624000509579977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,16,1,0,0.015301333119471868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,16,2,0,0.014181333283583323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,16,4,0,0.013274667163689932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,16,8,0,0.013141332815090815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,16,16,0,0.012831999609867731
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,16,32,0,0.012144000579913458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,16,64,0,0.012367999802033106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,32,1,0,0.014805333067973455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,32,2,0,0.013818666338920593
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,32,4,0,0.013807999591032663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,32,8,0,0.01321600005030632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,32,16,0,0.01301866645614306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,32,32,0,0.012847999731699625
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,32,64,0,0.012543999900420507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,32,1,0,0.015072000523408255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,32,2,0,0.013893333574136099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,32,4,0,0.013754667093356451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,32,8,0,0.013232000172138214
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,256,128,2,0,0.5957119862238566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,32,16,0,0.014432000617186228
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,32,32,0,0.012650666137536367
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,32,64,0,0.01268799975514412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,64,1,0,0.016010666886965435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,64,2,0,0.014629332969586054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,64,4,0,0.013962666193644205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,64,8,0,0.014074667046467463
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,64,16,0,0.014181333283583323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,64,32,0,0.01357866699496905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,64,64,0,0.03766933331886927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,64,1,0,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,64,2,0,0.0145066666106383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,64,4,0,0.014485333114862442
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,64,8,0,0.014159999787807465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,64,16,0,0.013893333574136099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,64,32,0,0.013482666263977686
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,64,64,0,0.013477332890033722
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,128,1,0,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,128,2,0,0.016229332735141117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,128,4,0,0.015664000064134598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,128,8,0,0.015370666980743408
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,128,16,0,0.01505600040157636
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,128,32,0,0.015087999403476715
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,128,64,0,0.014325333138306936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,128,1,0,0.01905599981546402
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,128,2,0,0.016469333320856094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,128,4,0,0.015573333948850632
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,128,8,0,0.015306666493415833
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,128,16,0,0.014991999914248785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,128,32,0,0.014885333677132925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,128,64,0,0.014671999961137772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,256,2,0,0.018842666099468868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,256,1,0,0.030826665461063385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,256,4,0,0.017978666971127193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,256,8,0,0.017301333447297413
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,256,16,0,0.01575999955336253
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,256,32,0,0.01584533353646596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,256,64,0,0.015791999797026317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,256,4,0,0.017642666896184284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,256,1,0,0.02863999952872594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,256,2,0,0.01903466631968816
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,256,8,0,0.0162773331006368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,256,16,0,0.016384000579516094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,256,32,0,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,256,64,0,0.015743999431530636
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,256,128,1,0,1.1010826428731282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,512,8,0,0.020773333807786305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,512,4,0,0.02204799900452296
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,512,2,0,0.03161599983771642
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,512,16,0,0.020330666253964107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,512,32,0,0.020015999674797058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,512,64,0,0.019839999576409657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,512,1,0,0.05570666491985321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,512,8,0,0.021087999145189922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,512,2,0,0.031119999786218006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,512,4,0,0.02202133337656657
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,512,16,0,0.020175999651352566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,512,32,0,0.0199946661790212
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,512,64,0,0.019909333437681198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,256,128,1,0,1.1268640359242756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,512,1,0,0.055904000997543335
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,1024,16,0,0.02828799933195114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,1024,8,0,0.029578665892283123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,1024,4,0,0.038949333131313324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,1024,32,0,0.027808000644048054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,1024,64,0,0.027322667340437572
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,1024,2,0,0.06743466854095459
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,1024,1,0,0.12471466263135274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,1024,4,0,0.039674667020638786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,1024,32,0,0.027610667049884796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,1024,16,0,0.02865600089232127
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,1024,64,0,0.027082666754722595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,1024,8,0,0.02938133229811986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,1024,1,0,0.12503467003504434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,1024,2,0,0.06795200208822887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,1536,32,0,0.035402665535608925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,1536,64,0,0.0349440003434817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,1536,16,0,0.03620799879233042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,1536,2,0,0.12549866239229837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,1536,4,0,0.06436799963315327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,1536,8,0,0.038933334251244865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,1536,1,0,0.22281599044799805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,1536,16,0,0.03665599972009659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,1536,32,0,0.035402665535608925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,1536,8,0,0.03858133405447006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,1536,64,0,0.034703999757766724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,1536,4,0,0.06440000236034393
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,1536,1,0,0.22168533007303873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,1536,2,0,0.1267680029074351
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,2048,16,0,0.044581333796183266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,2048,32,0,0.04318400224049886
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,2048,64,0,0.04274666806062063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,2048,8,0,0.05470933516820272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,2048,4,0,0.09717866778373718
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,2048,2,0,0.19079466660817465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,2048,1,0,0.34721068541208905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,2048,16,0,0.04465599854787191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,2048,8,0,0.054976001381874084
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,2048,64,0,0.04251199960708618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,2048,32,0,0.04305600126584371
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,2048,4,0,0.09718400239944458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,2048,2,0,0.19093332688013712
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,2048,1,0,0.3529013395309448
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,3072,32,0,0.059248000383377075
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,3072,16,0,0.06354666749636333
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,3072,8,0,0.10116799672444661
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,3072,64,0,0.058304001887639366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,3072,4,0,0.2020053267478943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,3072,2,0,0.36690131823221844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,3072,16,0,0.0631573349237442
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,3072,32,0,0.059392000238100685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,3072,8,0,0.1015786627928416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,3072,4,0,0.20638932784398398
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,3072,64,0,0.05748266478379568
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,3072,1,0,0.6890933513641357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,3072,2,0,0.3650933504104614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,4096,16,0,0.08557333548863728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,4096,32,0,0.07564266522725423
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,4096,8,0,0.15755200386047363
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,4096,4,0,0.32234134276707965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,4096,64,0,0.07361066838105519
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,4096,16,0,0.08506666620572408
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,4096,2,0,0.5923306544621786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,4096,8,0,0.15618133544921875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,3072,1,0,0.6998506387074789
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,4096,4,0,0.3263466755549113
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,4096,64,0,0.0732426643371582
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,4096,32,0,0.0758186678091685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,4096,2,0,0.5934826532999674
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,4096,1,0,1.1471467018127441
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,6144,16,0,0.1759679913520813
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,6144,8,0,0.3595893383026123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,6144,32,0,0.11169600486755371
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,6144,64,0,0.10481599966684978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,6144,4,0,0.6567840178807577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,4096,1,0,1.161621332168579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,6144,16,0,0.1768853267033895
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,6144,8,0,0.3601813316345215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,6144,32,0,0.11308800180753072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,6144,2,0,1.2402133146921794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,6144,4,0,0.6444480021794637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,6144,64,0,0.1046560009320577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,8192,16,0,0.2802453239758809
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,6144,2,0,1.2240426540374756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,8192,8,0,0.5872693459192911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,8192,64,0,0.13885333140691122
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,6144,1,0,2.4274187088012695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,8192,32,0,0.14896532893180847
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,8192,4,0,1.0973280270894368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,6144,1,0,2.3997227350870767
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,8192,8,0,0.5979413191477457
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,8192,2,0,2.116997400919596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,8192,16,0,0.27885866165161133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,8192,4,0,1.079925298690796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,8192,32,0,0.14852799971898398
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,8192,64,0,0.13983466227849325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,10240,16,0,0.426202654838562
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,8192,2,0,2.0857812563578286
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,10240,8,0,0.8759573300679525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,10240,32,0,0.23227200905481973
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,8192,1,0,4.249109268188477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,10240,64,0,0.17495467265446982
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,10240,4,0,1.6315733591715496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,10240,16,0,0.4188426733016968
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,10240,8,0,0.8795146942138672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,10240,32,0,0.2311306595802307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,10240,2,0,3.2530721028645835
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,10240,64,0,0.17482133706410727
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,10240,4,0,1.634149392445882
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,8192,1,0,4.090479850769043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,12288,16,0,0.6717120011647543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,10240,2,0,3.2175092697143555
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,12288,8,0,1.2200106779734294
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,12288,32,0,0.32180267572402954
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,12288,64,0,0.21049066384633383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,12288,4,0,2.3247092564900718
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,10240,1,0,6.912581125895183
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,12288,16,0,0.6831839879353842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,12288,8,0,1.2315946420033772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,12288,32,0,0.32370134194691974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,10240,1,0,6.602906545003255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,12288,4,0,2.3646186192830405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,12288,64,0,0.2082080046335856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,12288,2,0,4.767626762390137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,1,16384,16,0,1.1302186648050945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,12288,2,0,4.450981458028157
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,1,16384,8,0,2.07859738667806
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,1,16384,64,0,0.2738560040791829
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,1,16384,32,0,0.5241493384043375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,1,16384,4,0,4.359631856282552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,12288,1,0,9.71121088663737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,1,16384,16,0,1.1138986746470134
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,1,16384,8,0,2.109930674235026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,1,16384,64,0,0.27695999542872113
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,1,16384,32,0,0.5199786822001139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,12288,1,0,9.682485580444336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,1,16384,2,0,8.41154670715332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,16,1,0,0.018378666291634243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,1,16384,4,0,4.002490679423015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,16,2,0,0.014639999717473984
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,16,4,0,0.014090667168299357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,16,8,0,0.013274667163689932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,16,16,0,0.012986666212479273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,16,32,0,0.013034666577974955
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,16,64,0,0.012421333541472753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,16,1,0,0.01868266612291336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,16,2,0,0.014362666755914688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,16,4,0,0.013770667215188345
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,16,8,0,0.01313599944114685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,16,16,0,0.013077333569526672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,16,32,0,0.012826666235923767
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,16,64,0,0.014815999815861383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,32,1,0,0.018239999810854595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,32,2,0,0.014655999839305878
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,32,4,0,0.013999999811251959
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,32,8,0,0.03685333331425985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,32,16,0,0.013157332936922709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,1,16384,2,0,8.35595703125
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,32,32,0,0.012778667112191519
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,32,64,0,0.012479999413092932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,32,1,0,0.01826133330663045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,32,2,0,0.014901333798964819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,32,4,0,0.013983999689420065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,32,8,0,0.013605333864688873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,32,16,0,0.013072000195582708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,32,32,0,0.01301866645614306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,32,64,0,0.01360000049074491
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,64,1,0,0.020256000260512035
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,64,2,0,0.016330666840076447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,64,4,0,0.014666666587193808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,64,8,0,0.014175999909639359
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,64,32,0,0.014831999937693277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,64,16,0,0.013872000078360239
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,64,64,0,0.013733333597580591
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,64,1,0,0.020037333170572918
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,64,2,0,0.01613333324591319
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,64,8,0,0.014442666123310724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,64,16,0,0.013877333452304205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,64,4,0,0.014661333213249842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,64,32,0,0.013839999834696451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,64,64,0,0.013461332768201828
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,128,4,0,0.016271999726692837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,128,2,0,0.01870399961868922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,128,1,0,0.02698666602373123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,128,8,0,0.015471999843915304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,128,16,0,0.015610666324694952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,128,32,0,0.015029333531856537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,128,64,0,0.014837333311637243
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,128,2,0,0.018874666343132656
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,128,4,0,0.017909333109855652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,128,8,0,0.01581866666674614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,128,16,0,0.015322666615247726
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,128,1,0,0.027855999767780304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,128,32,0,0.015109332899252573
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,128,64,0,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,256,4,0,0.019098666807015736
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,256,2,0,0.02810666710138321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,256,1,0,0.05190933247407278
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,1,16384,1,0,17.18080012003581
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,256,8,0,0.017743999759356182
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,256,32,0,0.016202667107184727
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,256,16,0,0.016789333273967106
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,256,64,0,0.01591466615597407
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,256,32,0,0.016458666572968166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,256,4,0,0.019093333433071773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,256,2,0,0.02796799937884013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,256,1,0,0.052015999952952065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,256,8,0,0.017423999806245167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,256,64,0,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,256,16,0,0.017242666333913803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,1,16384,1,0,16.878021240234375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,512,16,0,0.02111999938885371
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,512,32,0,0.020448000480731327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,512,8,0,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,512,4,0,0.03236799935499827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,512,64,0,0.02012266715367635
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,512,2,0,0.05619733532269796
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,512,1,0,0.09313600262006123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,512,16,0,0.021082667013009388
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,512,8,0,0.022277332842350006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,512,32,0,0.020554666717847187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,512,1,0,0.09170666337013245
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,512,4,0,0.032287999987602234
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,512,2,0,0.05620799958705902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,512,64,0,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,1024,32,0,0.0286613330245018
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,1024,64,0,0.027935999135176342
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,1024,16,0,0.029882666965325672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,1024,8,0,0.04033066580692927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,1024,4,0,0.06735999882221222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,1024,2,0,0.12545599540074667
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,1024,16,0,0.02998399982849757
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,1024,32,0,0.028501334289709728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,1024,8,0,0.04027199993530909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,1024,1,0,0.22947200139363608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,1024,64,0,0.027749332288901012
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,1024,4,0,0.06741333504517873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,1024,2,0,0.12469866871833801
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,1024,1,0,0.2305813431739807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,1536,16,0,0.04067733387152354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,1536,32,0,0.037087999284267426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,1536,8,0,0.07242666681607564
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,1536,64,0,0.035887998839219414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,1536,2,0,0.22312533855438232
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,1536,4,0,0.1255466639995575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,1536,16,0,0.040778666734695435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,1536,8,0,0.06519466638565063
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,1536,4,0,0.12706666191418967
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,1536,32,0,0.03697066754102707
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,1536,64,0,0.03603200117746989
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,1536,2,0,0.22452799479166666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,1536,1,0,0.4207040071487427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,2048,64,0,0.043935999274253845
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,2048,32,0,0.045466666420300804
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,1536,1,0,0.42692800362904865
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,2048,4,0,0.1912426749865214
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,2048,16,0,0.05606399973233541
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,2048,8,0,0.09815999865531921
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,2048,2,0,0.35236799716949463
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,2048,32,0,0.045370668172836304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,2048,8,0,0.09799466530481975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,2048,4,0,0.19310933351516724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,2048,64,0,0.04371733466784159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,2048,16,0,0.05565333366394043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,2048,1,0,0.6784693400065104
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,2048,2,0,0.3532533248265584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,3072,8,0,0.2069173256556193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,3072,16,0,0.10290132959683736
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,3072,32,0,0.06542933483918507
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,3072,4,0,0.3689653476079305
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,3072,64,0,0.06055466830730438
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,2048,1,0,0.6733439763387045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,3072,32,0,0.06534400085608165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,3072,8,0,0.2093706727027893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,3072,2,0,0.6882932980855306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,3072,16,0,0.10439999898274739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,3072,4,0,0.36634135246276855
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,3072,64,0,0.060602664947509766
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,4096,16,0,0.1591039995352427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,3072,2,0,0.6842239697774252
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,3072,1,0,1.3476212819417317
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,4096,8,0,0.3290026585261027
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,4096,4,0,0.6071253220240275
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,4096,32,0,0.08749333024024963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,4096,64,0,0.07904533545176189
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,4096,16,0,0.15971733132998148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,4096,2,0,1.1416320006052654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,4096,8,0,0.32551999886830646
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,3072,1,0,1.3673866589864094
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,4096,32,0,0.0881706674893697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,4096,64,0,0.07855466504891713
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,4096,4,0,0.6077920198440552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,4096,2,0,1.148645321528117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,6144,16,0,0.3718186616897583
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,6144,8,0,0.6495199998219808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,6144,32,0,0.17918932437896729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,6144,64,0,0.11737066507339478
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,4096,1,0,2.2917760213216147
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,6144,4,0,1.231280008951823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,6144,8,0,0.6587946812311808
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,4096,1,0,2.251973311106364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,6144,16,0,0.36352535088857013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,6144,64,0,0.11665599544843037
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,6144,4,0,1.250378688176473
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,6144,32,0,0.18129066626230875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,6144,2,0,2.4278666178385415
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,6144,2,0,2.3968213399251304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,8192,16,0,0.6008640130360922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,8192,8,0,1.1070666313171387
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,8192,64,0,0.15340266625086466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,8192,32,0,0.2834933400154114
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,8192,4,0,2.1225172678629556
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,6144,1,0,4.7351681391398115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,8192,8,0,1.1103893121083577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,8192,2,0,4.303466796875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,8192,16,0,0.589247981707255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,8192,32,0,0.28546667098999023
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,8192,4,0,2.1071680386861167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,6144,1,0,5.013423919677734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,8192,64,0,0.15290133158365884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,10240,16,0,0.8754666646321615
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,10240,8,0,1.6327892939249675
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,10240,32,0,0.4267093340555827
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,8192,2,0,4.3345333735148115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,10240,64,0,0.2381920019785563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,10240,4,0,3.2370773951212564
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,10240,8,0,1.6515199343363445
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,8192,1,0,9.035605112711588
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,10240,4,0,3.2072372436523438
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,10240,16,0,0.8825813134511312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,10240,64,0,0.23810132344563803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,10240,32,0,0.4315413236618042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,10240,2,0,6.662538528442383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,10240,2,0,6.722351710001628
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,12288,8,0,2.346293290456136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,8192,1,0,8.767018636067709
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,12288,4,0,4.595946629842122
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,12288,32,0,0.6863199869791666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,12288,16,0,1.2398453553517659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,12288,64,0,0.3330186605453491
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,10240,1,0,13.592768351236979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,12288,8,0,2.3258560498555503
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,12288,2,0,9.735642751057943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,10240,1,0,13.616427103678385
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,12288,32,0,0.6907947063446045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,12288,64,0,0.33002666632334393
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,12288,16,0,1.2172053654988606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,12288,4,0,4.670826594034831
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,2,16384,16,0,2.0953547159830728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,2,16384,8,0,3.949786822001139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,2,16384,32,0,1.1404213110605876
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,2,16384,64,0,0.5290453433990479
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,12288,2,0,9.8210080464681
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,2,16384,4,0,8.394816080729166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,12288,1,0,19.31885274251302
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,2,16384,8,0,4.16755739847819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,2,16384,16,0,2.0820159912109375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,2,16384,32,0,1.12336532274882
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,2,16384,64,0,0.5299946864446005
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,2,16384,2,0,16.787546793619793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,16,1,0,0.024847999215126038
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,2,16384,4,0,8.727877298990885
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,16,2,0,0.018191999445358913
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,16,8,0,0.013616000612576803
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,12288,1,0,19.543460845947266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,16,16,0,0.013669333110253016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,16,4,0,0.015050667027632395
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,16,32,0,0.012890666723251343
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,16,64,0,0.012549333274364471
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,16,1,0,0.025802666942278545
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,16,2,0,0.01844800015290578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,16,8,0,0.013722666849692663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,16,16,0,0.013365333278973898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,16,4,0,0.01492799942692121
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,16,32,0,0.013077333569526672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,16,64,0,0.012901333471139273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,32,1,0,0.02513066679239273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,32,2,0,0.018160000443458557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,32,8,0,0.013781332721312841
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,32,4,0,0.014896000425020853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,32,16,0,0.013679999858140945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,32,32,0,0.01333333303531011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,32,64,0,0.012752000242471695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,32,1,0,0.025888000925381977
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,32,4,0,0.015125333021084467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,32,8,0,0.013978666315476099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,32,16,0,0.013584000368913015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,32,2,0,0.018005333840847015
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,32,32,0,0.013130666067202887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,32,64,0,0.01313599944114685
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,64,2,0,0.01995733380317688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,64,1,0,0.029557332396507263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,64,8,0,0.014805333067973455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,64,16,0,0.01431999976436297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,64,4,0,0.01621333385507266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,64,32,0,0.014197333405415217
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,64,64,0,0.013669333110253016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,64,1,0,0.029253333806991577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,64,2,0,0.020448000480731327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,64,8,0,0.014842666685581207
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,64,16,0,0.014245333770910898
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,64,4,0,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,64,32,0,0.014064000298579534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,64,64,0,0.013983999689420065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,128,2,0,0.027765333652496338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,2,16384,2,0,16.861092885335285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,128,1,0,0.048512001832326256
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,128,8,0,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,128,16,0,0.01578666642308235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,128,4,0,0.018805333723624546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,128,64,0,0.015504000087579092
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,128,32,0,0.015610666324694952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,128,2,0,0.027658666173617046
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,128,4,0,0.019082666685183842
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,128,1,0,0.048528000712394714
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,128,16,0,0.015893333901961643
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,128,8,0,0.016496000190575916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,128,32,0,0.01562133307258288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,128,64,0,0.015034666905800501
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,256,2,0,0.051872000098228455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,256,4,0,0.029050665597120922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,256,16,0,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,256,32,0,0.01687466725707054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,256,1,0,0.07894933223724365
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,256,8,0,0.01939733326435089
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,256,64,0,0.01651200031240781
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,256,4,0,0.028543998797734577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,256,8,0,0.01964266722400983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,256,2,0,0.05157333115736643
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,256,16,0,0.017935999979575474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,256,32,0,0.01687466725707054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,256,64,0,0.016255999604860943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,256,1,0,0.0786186655362447
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,512,8,0,0.032826667030652366
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,512,32,0,0.021253332495689392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,512,16,0,0.022805333137512207
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,512,2,0,0.09212799866994222
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,512,4,0,0.05686933298905691
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,512,64,0,0.020666666328907013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,2,16384,1,0,33.66868336995443
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,512,1,0,0.1702079971631368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,512,16,0,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,512,4,0,0.05690133571624756
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,512,8,0,0.03319466610749563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,512,2,0,0.09259733557701111
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,512,32,0,0.02165333429972331
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,512,64,0,0.020703999946514767
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,1024,32,0,0.030634666482607525
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,1024,8,0,0.06950399776299794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,1024,16,0,0.04160533348719279
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,512,1,0,0.16897600889205933
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,1024,64,0,0.029370665550231934
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,1024,4,0,0.12556800246238708
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,1024,2,0,0.22822932402292886
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,1024,4,0,0.125791996717453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,1024,8,0,0.0699893335501353
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,1024,16,0,0.04155733436346054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,1024,32,0,0.030805334448814392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,1024,64,0,0.029114666084448498
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,1024,1,0,0.43509864807128906
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,1536,8,0,0.12772267063458762
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,1024,2,0,0.22729599475860596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,1536,16,0,0.06730666756629944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,1024,1,0,0.4413119951883952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,1536,4,0,0.226090669631958
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,1536,32,0,0.0433599998553594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,1536,64,0,0.03809066613515218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,1536,2,0,0.41733864943186444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,1536,16,0,0.06684799989064534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,1536,8,0,0.12804266810417175
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,1536,4,0,0.22662933667500815
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,1536,64,0,0.03813866774241129
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,1536,32,0,0.0428959975639979
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,1536,2,0,0.42532801628112793
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,1536,1,0,0.8346186478932699
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,2048,32,0,0.058543999989827476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,2048,16,0,0.10223999619483948
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,2048,8,0,0.19430933396021524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,2048,64,0,0.04900800188382467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,2,16384,1,0,33.682848612467446
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,1536,1,0,0.8198773066202799
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,2048,4,0,0.35170666376749676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,2048,2,0,0.6752906640370687
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,2048,16,0,0.10034132997194926
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,2048,8,0,0.19776000579198202
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,2048,64,0,0.048885335524876915
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,2048,32,0,0.057861333092053734
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,2048,4,0,0.355951984723409
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,3072,16,0,0.2137920061747233
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,3072,8,0,0.37032532691955566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,2048,2,0,0.6668639977773031
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,2048,1,0,1.319258689880371
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,3072,32,0,0.10884267091751099
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,3072,64,0,0.06963199873765309
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,3072,4,0,0.6985973517100016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,3072,16,0,0.21036799748738608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,2048,1,0,1.345263957977295
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,3072,8,0,0.3686666488647461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,3072,64,0,0.06951466699441274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,3072,32,0,0.10870933532714844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,3072,4,0,0.704207976659139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,3072,2,0,1.364576021830241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,4096,16,0,0.3346773386001587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,3072,2,0,1.3414613405863445
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,4096,8,0,0.6118826468785604
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,4096,32,0,0.16688533624013266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,4096,64,0,0.09339732925097148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,4096,4,0,1.15449062983195
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,3072,1,0,2.6613386472066245
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,4096,16,0,0.333957314491272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,4096,8,0,0.6048959891001383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,3072,1,0,2.6348212560017905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,4096,2,0,2.2842772801717124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,4096,64,0,0.09298666318257649
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,4096,32,0,0.1651946703592936
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,4096,4,0,1.147056023279826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,6144,16,0,0.6590293248494467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,6144,8,0,1.2344266573588054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,6144,32,0,0.37144001324971515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,4096,2,0,2.2798239390055337
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,6144,64,0,0.18794133265813193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,6144,4,0,2.4160426457722983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,4096,1,0,4.748922665913899
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,6144,16,0,0.6584426561991373
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,6144,8,0,1.2431626319885254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,4096,1,0,4.5538028081258135
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,6144,32,0,0.37512000401814777
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,6144,64,0,0.18786666790644327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,6144,4,0,2.457109292348226
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,6144,2,0,4.806927998860677
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,4,8192,16,0,1.1187573273976643
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,4,8192,8,0,2.092463970184326
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,4,8192,32,0,0.6131946643193563
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,4,8192,64,0,0.2933013240496318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,6144,2,0,4.854149182637532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,4,8192,4,0,4.112725257873535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,4,8192,8,0,2.1084799766540527
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,4,8192,2,0,8.820789337158203
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,6144,1,0,10.205205281575521
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,4,8192,16,0,1.104085365931193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,4,8192,4,0,4.327413241068522
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,16,1,0,0.03677333394686381
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,16,2,0,0.024469333390394848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,6144,1,0,10.260106404622396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,4,8192,64,0,0.29337600866953534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,16,4,0,0.01802666609485944
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,16,8,0,0.014954666296641031
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,16,16,0,0.013877333452304205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,16,32,0,0.013530666629473368
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,4,8192,32,0,0.602351983388265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,16,64,0,0.01303999995191892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,16,1,0,0.03709866603215536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,16,2,0,0.024469333390394848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,16,4,0,0.018394666413466137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,16,16,0,0.0136266661187013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,16,8,0,0.014442666123310724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,16,32,0,0.013295999417702356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,16,64,0,0.013050666699806849
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,32,1,0,0.03958933303753535
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,32,2,0,0.024495999018351238
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,32,4,0,0.017898666361967724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,32,8,0,0.014671999961137772
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,32,16,0,0.01379199946920077
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,32,32,0,0.013503999759753546
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,32,64,0,0.013210666676362356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,32,2,0,0.024559999505678814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,32,1,0,0.0397173340121905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,32,4,0,0.017616000026464462
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,32,8,0,0.014650666465361914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,32,16,0,0.013866666704416275
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,32,32,0,0.013450667262077332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,32,64,0,0.013338666409254074
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,4,8192,2,0,8.878927866617838
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,64,4,0,0.020303999384244282
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,64,8,0,0.016058667252461117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,64,1,0,0.04959466556708018
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,64,2,0,0.030031998952229817
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,64,32,0,0.014560000350077948
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,64,16,0,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,64,64,0,0.014069333672523499
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,64,8,0,0.015925332903862
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,64,2,0,0.03014933317899704
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,64,4,0,0.02014933278163274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,64,32,0,0.014314666390419006
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,64,16,0,0.01509333277742068
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,64,1,0,0.049786667029062905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,64,64,0,0.014096000542243322
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,128,16,0,0.01621866722901662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,128,8,0,0.019061333189407986
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,128,4,0,0.027642667293548584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,128,32,0,0.016074666132529575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,128,2,0,0.04816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,128,1,0,0.08292800188064575
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,128,64,0,0.015471999843915304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,128,2,0,0.048112000028292336
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,128,4,0,0.02846933404604594
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,128,16,0,0.016336000214020412
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,128,8,0,0.018757333358128864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,128,32,0,0.016042667130629223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,128,1,0,0.08404800295829773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,128,64,0,0.015493333339691162
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,256,8,0,0.029616000751654308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,256,16,0,0.019567999988794327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,256,4,0,0.05273066461086273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,256,32,0,0.01842133328318596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,256,2,0,0.07870399951934814
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,256,64,0,0.01758933315674464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,256,1,0,0.14255467057228088
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,256,4,0,0.05263466636339823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,256,8,0,0.029658667743206024
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,256,16,0,0.019621333728233974
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,256,2,0,0.07839466631412506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,256,32,0,0.018506667266289394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,256,64,0,0.01810666670401891
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,256,1,0,0.14096533258756003
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,512,16,0,0.034927998979886375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,512,8,0,0.05971199770768484
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,512,4,0,0.09354133407274882
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,512,32,0,0.023258666197458904
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,512,64,0,0.022143999735514324
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,512,2,0,0.17015467087427774
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,4,8192,1,0,18.043354034423828
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,512,8,0,0.05835733314355215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,512,16,0,0.034517332911491394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,512,4,0,0.09330133597056071
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,512,1,0,0.31968533992767334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,512,64,0,0.02203733225663503
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,512,32,0,0.023775999744733173
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,512,2,0,0.16901866594950357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,1024,32,0,0.04417066772778829
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,1024,16,0,0.07296533385912578
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,1024,8,0,0.12896000345547995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,512,1,0,0.3208906650543213
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,1024,64,0,0.0341333324710528
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,1024,4,0,0.22841066122055054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,1024,16,0,0.07276266813278198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,1024,8,0,0.12955733140309653
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,1024,4,0,0.2290453314781189
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,1024,2,0,0.4330666859944661
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,1024,64,0,0.03389866650104523
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,1024,32,0,0.0441599984963735
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,4,8192,1,0,18.046836853027344
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,1536,8,0,0.22778666019439697
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,1024,2,0,0.4347093502680461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,1536,16,0,0.13500266273816428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,1536,64,0,0.0469706654548645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,1536,32,0,0.07169066866238911
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,1536,4,0,0.4240373373031616
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,1024,1,0,0.8672479788462321
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,1024,1,0,0.8701439698537191
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,1536,16,0,0.13387200236320496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,1536,8,0,0.2302186687787374
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,1536,32,0,0.07246933380762736
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,1536,64,0,0.047151997685432434
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,1536,2,0,0.8199146588643392
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,1536,4,0,0.4204213221867879
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,2048,16,0,0.20158400138219199
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,2048,8,0,0.3576853275299072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,2048,32,0,0.10656000177065532
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,2048,64,0,0.06390400230884552
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,1536,2,0,0.8242239952087402
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,2048,4,0,0.6856053670247396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,1536,1,0,1.6186347007751465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,2048,16,0,0.2015893260637919
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,2048,2,0,1.3413066864013672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,2048,8,0,0.36022400856018066
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,1536,1,0,1.6496853828430176
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,2048,32,0,0.10668266812960307
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,2048,4,0,0.6799893379211426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,2048,64,0,0.06444799900054932
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,3072,16,0,0.38155198097229004
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,2048,2,0,1.3199573357899983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,3072,8,0,0.7065440018971761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,3072,32,0,0.2226240038871765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,3072,64,0,0.11678933103879292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,3072,4,0,1.3529225985209148
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,2048,1,0,2.6398293177286782
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,3072,8,0,0.7003200054168701
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,3072,16,0,0.37823466459910077
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,2048,1,0,2.6212426821390786
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,3072,4,0,1.3537920316060383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,3072,32,0,0.21845332781473795
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,3072,2,0,2.7289759318033853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,3072,64,0,0.11637333035469055
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,8,4096,16,0,0.6110026836395264
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,3072,2,0,2.7493225733439126
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,8,4096,8,0,1.1790293057759602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,8,4096,32,0,0.34124799569447833
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,8,4096,64,0,0.17804267009099325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,8,4096,4,0,2.2528907457987466
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,8,4096,8,0,1.1565173467000325
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,3072,1,0,5.590634663899739
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,8,4096,4,0,2.2968479792277017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,8,4096,2,0,4.642416000366211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,3072,1,0,5.4040482838948565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,8,4096,16,0,0.6131306489308676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,16,2,0,0.03566399961709976
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,8,4096,64,0,0.17813867330551147
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,16,1,0,0.061621333161989846
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,16,4,0,0.023898666103680927
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,8,4096,32,0,0.3492586612701416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,16,8,0,0.01735466718673706
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,16,16,0,0.015098666151364645
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,16,32,0,0.014533333480358124
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,16,64,0,0.01332266628742218
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,16,1,0,0.06028266747792562
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,16,4,0,0.024319998919963837
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,16,2,0,0.0355679988861084
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,16,8,0,0.01746133342385292
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,16,16,0,0.014458666245142618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,16,32,0,0.013642666240533194
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,16,64,0,0.013189333180586496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,32,4,0,0.02458133300145467
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,32,1,0,0.0666720022757848
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,32,2,0,0.039690665900707245
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,32,8,0,0.017792000124851864
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,32,16,0,0.014938666174809137
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,32,32,0,0.013786666095256805
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,32,64,0,0.0136266661187013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,32,4,0,0.024421334266662598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,32,2,0,0.03944533318281174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,32,8,0,0.017605333278576534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,32,16,0,0.01452800010641416
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,32,1,0,0.06739733119805653
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,32,32,0,0.01403733342885971
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,32,64,0,0.013514666507641474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,64,4,0,0.03018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,64,8,0,0.020224000016848247
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,64,16,0,0.016229332735141117
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,64,2,0,0.0499839981396993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,64,1,0,0.08499733606974284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,64,32,0,0.014997333288192749
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,64,64,0,0.016837333639462788
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,64,4,0,0.0301706666747729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,8,4096,2,0,4.648543993631999
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,64,2,0,0.049312000473340355
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,64,8,0,0.019930666933457058
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,64,32,0,0.01481066644191742
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,64,16,0,0.016650666793187458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,64,1,0,0.08489066362380981
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,64,64,0,0.014688000082969666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,128,32,0,0.017008000363906223
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,128,16,0,0.019189332922299702
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,128,8,0,0.029018667836983997
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,128,2,0,0.08329600095748901
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,128,4,0,0.04923733572165171
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,128,64,0,0.01695999999841054
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,128,8,0,0.028959999481836956
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,128,1,0,0.15392000476519266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,128,4,0,0.04879466692606608
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,128,16,0,0.01923199991385142
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,128,2,0,0.08689066767692566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,128,64,0,0.016672000288963318
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,128,32,0,0.01717866708834966
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,128,1,0,0.15500266353289285
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,256,16,0,0.03145066648721695
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,256,8,0,0.054245332876841225
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,256,4,0,0.07986133297284444
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,256,32,0,0.02042666698495547
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,256,64,0,0.019205333044131596
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,256,2,0,0.14095999797185263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,256,8,0,0.05379733443260193
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,256,16,0,0.0317546675602595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,256,4,0,0.07993599772453308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,8,4096,1,0,9.704405466715494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,256,1,0,0.26733867327372235
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,256,32,0,0.02070933332045873
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,256,64,0,0.019013332823912304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,256,2,0,0.1421280006567637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,512,32,0,0.03738133360942205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,512,16,0,0.06320533156394958
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,512,8,0,0.09692266583442688
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,256,1,0,0.26791467269261676
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,512,64,0,0.02625600000222524
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,512,4,0,0.1709173321723938
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,512,16,0,0.06353599826494853
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,512,8,0,0.09750933448473613
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,512,2,0,0.3158613244692485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,512,4,0,0.17117865880330405
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,512,64,0,0.026506667335828144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,512,32,0,0.0373279998699824
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,512,2,0,0.31772265831629437
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,8,4096,1,0,9.870037078857422
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,1024,32,0,0.07954666515191396
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,1024,8,0,0.23597866296768188
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,1024,16,0,0.13594133655230203
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,1024,64,0,0.05054399867852529
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,512,1,0,0.6304159959157308
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,512,1,0,0.6224639813105265
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,1024,4,0,0.4448853333791097
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,1024,32,0,0.07852800190448761
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,1024,16,0,0.13563733299573263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,1024,8,0,0.23804267247517905
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,1024,64,0,0.0503359983364741
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,1024,2,0,0.8560160001118978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,1024,4,0,0.44497064749399823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,1536,16,0,0.24064532915751138
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,1024,2,0,0.850495974222819
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,1536,8,0,0.4338506857554118
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,1536,32,0,0.1423679987589518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,1536,64,0,0.0805920014778773
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,1536,4,0,0.8405546347300211
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,1024,1,0,1.6904373168945312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,1536,8,0,0.437173326810201
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,1536,16,0,0.236735999584198
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,1024,1,0,1.6904959678649902
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,1536,64,0,0.0823520024617513
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,1536,4,0,0.8255146344502767
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,1536,32,0,0.14270933469136557
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,1536,2,0,1.6489440600077312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,16,2048,16,0,0.37299199899037677
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,1536,2,0,1.6496799786885579
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,16,2048,8,0,0.6987466812133789
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,16,2048,64,0,0.12135466933250427
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,16,2048,32,0,0.2132533391316732
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,16,2048,4,0,1.3282240231831868
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,1536,1,0,3.258591969807943
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,16,2048,8,0,0.6870613098144531
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,16,2048,16,0,0.3670773506164551
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,16,2048,32,0,0.2111306587855021
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,16,2048,2,0,2.675701459248861
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,32,16,1,0,0.1067039966583252
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,16,2048,64,0,0.11727466185887654
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,16,2,0,0.06147199869155884
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,16,2048,4,0,1.3477813402811687
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,16,4,0,0.03536533315976461
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,16,8,0,0.023402666052182514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,1536,1,0,3.285242716471354
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,16,16,0,0.017418666432301205
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,16,32,0,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,16,64,0,0.013754667093356451
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,32,16,1,0,0.10713066657384236
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,16,2,0,0.06159466505050659
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,16,4,0,0.03512533257404963
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,16,8,0,0.023152001202106476
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,16,16,0,0.017573333034912746
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,16,32,0,0.014538666854302088
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,16,64,0,0.0138026662170887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,32,4,0,0.03985599925120672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,32,2,0,0.06697600086530049
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,32,8,0,0.024288001159826916
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,32,16,0,0.01812800019979477
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,32,32,1,0,0.12271466851234436
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,32,32,0,0.014805333067973455
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,32,64,0,0.014186666657527288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,32,2,0,0.06768533090750377
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,32,4,0,0.03944533318281174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,32,8,0,0.02430933217207591
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,32,16,0,0.01801066721479098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,32,32,1,0,0.12325867017110188
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,32,32,0,0.015072000523408255
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,32,64,0,0.013994666437307993
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,16,2048,2,0,2.6801973978678384
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,64,4,0,0.049642667174339294
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,64,8,0,0.03141333411137263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,64,16,0,0.020661332954963047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,64,32,0,0.016927999754746754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,64,2,0,0.08520000179608662
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,64,64,0,0.015402667224407196
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,32,64,1,0,0.15677866339683533
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,64,8,0,0.030997333427270252
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,64,4,0,0.050144001841545105
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,64,16,0,0.020554666717847187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,64,2,0,0.08570667107899983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,64,32,0,0.016741332908471424
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,64,64,0,0.015354666858911514
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,32,64,1,0,0.1569546659787496
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,128,8,0,0.05007466673851013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,128,16,0,0.030954666435718536
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,128,32,0,0.02041600023706754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,128,4,0,0.08589333295822144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,128,64,0,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,128,2,0,0.15316266814867655
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,128,8,0,0.04969066878159841
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,128,16,0,0.03120533376932144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,128,4,0,0.0860693355401357
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,128,64,0,0.018229333062966663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,32,128,1,0,0.30401066939036053
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,128,32,0,0.020448000480731327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,128,2,0,0.15331199765205383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,256,16,0,0.0569706658522288
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,256,8,0,0.08338133494059245
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,16,2048,1,0,5.658122380574544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,32,128,1,0,0.29319467147191364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,256,32,0,0.034847999612490334
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,256,4,0,0.14346133669217428
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,256,64,0,0.02364266663789749
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,256,16,0,0.058176000912984215
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,256,8,0,0.08331733445326488
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,256,2,0,0.2632799943288167
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,256,32,0,0.03498133271932602
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,256,4,0,0.1437013347943624
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,256,64,0,0.0239680012067159
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,256,2,0,0.2647413412729899
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,32,256,1,0,0.5154720147450765
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,512,8,0,0.1777706742286682
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,512,64,0,0.044346665342648826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,512,32,0,0.07012266914049785
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,512,16,0,0.10402133067448933
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,512,4,0,0.32386134068171185
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,32,256,1,0,0.5167520046234131
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,16,2048,1,0,5.74342409769694
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,512,16,0,0.10371200243631999
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,512,8,0,0.17692800362904867
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,512,32,0,0.07086933155854543
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,512,2,0,0.6175893147786459
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,512,64,0,0.04411733150482178
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,512,4,0,0.3251466751098633
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,32,1024,16,0,0.24898133675257364
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,32,1024,8,0,0.44940801461537677
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,512,2,0,0.6150559981664022
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,32,1024,64,0,0.090037335952123
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,32,1024,32,0,0.14671466747919717
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,32,1024,4,0,0.8605813185373942
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,32,512,1,0,1.2481173674265544
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,32,512,1,0,1.2655839920043945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,32,1024,8,0,0.44885865847269696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,32,1024,32,0,0.14698132872581482
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,32,1024,16,0,0.245904008547465
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,32,1024,64,0,0.08989866574605306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,64,16,1,0,0.23674132426579794
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,16,2,0,0.10705066720644633
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,16,4,0,0.06063466767470042
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,16,16,0,0.023765332996845245
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,16,8,0,0.03637866675853729
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,32,1024,4,0,0.8747039635976156
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,16,32,0,0.017551999539136887
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,16,64,0,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,32,1024,2,0,1.7324533462524414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,16,4,0,0.06118933359781901
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,16,2,0,0.1106773316860199
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,16,8,0,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,64,16,1,0,0.23115734259287515
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,16,32,0,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,16,16,0,0.02399466683467229
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,16,64,0,0.01463466634353002
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,32,16,0,0.02518400053183238
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,32,8,0,0.04018666595220566
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,32,2,0,0.12392000357309978
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,32,4,0,0.06878933310508728
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,32,32,0,0.018101333330074947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,32,64,0,0.015285332997639975
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,64,32,1,0,0.23907732963562012
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,32,8,0,0.04009066770474116
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,32,2,0,0.12225600083669026
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,32,4,0,0.06805333495140076
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,32,16,0,0.02476266771554947
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,32,32,0,0.018426666657129925
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,32,64,0,0.015237333873907724
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,64,32,1,0,0.24257065852483115
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,32,1024,2,0,1.7036159833272297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,64,16,0,0.0329120010137558
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,64,32,0,0.02179199953873952
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,64,8,0,0.05169600248336792
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,64,4,0,0.08715732892354329
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,64,64,0,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,64,8,0,0.0514933317899704
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,64,4,0,0.08737599849700928
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,64,2,0,0.15702933073043823
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,64,2,0,0.1577173372109731
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,64,32,0,0.021557333568731945
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,64,16,0,0.03327466547489166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,64,64,1,0,0.30137066046396893
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,64,64,0,0.017797333498795826
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,64,64,1,0,0.3004053235054016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,128,16,0,0.05282133320967356
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,128,8,0,0.08944533268610637
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,128,32,0,0.03454933315515518
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,128,4,0,0.1607200006643931
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,128,64,0,0.0234400009115537
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,128,2,0,0.2943146626154582
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,128,4,0,0.16158933440844217
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,128,8,0,0.08885866403579712
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,128,16,0,0.05411200225353241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,128,32,0,0.03453333427508672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,128,64,0,0.02332266668478648
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,128,2,0,0.28749332825342816
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,64,128,1,0,0.571125348409017
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,32,1024,1,0,3.4319305419921875
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,256,16,0,0.08878933389981587
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,256,8,0,0.14825066924095154
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,32,1024,1,0,3.359482765197754
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,256,32,0,0.06553600231806438
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,256,64,0,0.04233600199222565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,256,4,0,0.26656534274419147
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,64,128,1,0,0.5716000000635783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,256,32,0,0.06461866696675618
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,256,16,0,0.08957866827646892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,256,8,0,0.14797332882881165
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,256,64,0,0.04227200150489807
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,256,2,0,0.5099680026372274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,256,4,0,0.2674506704012553
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,64,512,16,0,0.18797866503397623
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,256,2,0,0.5076479911804199
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,64,512,8,0,0.3362293243408203
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,64,512,32,0,0.11425066987673442
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,64,512,64,0,0.08284799754619598
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,64,256,1,0,1.0223413308461506
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,64,512,4,0,0.6356319983800253
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,64,256,1,0,1.0077226956685383
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,64,512,8,0,0.33685866991678876
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,64,512,16,0,0.1896053353945414
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,64,512,32,0,0.11755200227101643
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,64,512,64,0,0.08247999846935272
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,64,512,4,0,0.6251093149185181
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,16,4,0,0.11134399970372517
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,16,2,0,0.22685867547988892
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,16,8,0,0.061759998401006065
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,64,512,2,0,1.235642671585083
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,16,16,0,0.03600533306598663
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,128,16,1,0,0.46164266268412274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,16,32,0,0.023760000864664715
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,16,64,0,0.017909333109855652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,16,8,0,0.06190933287143707
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,16,4,0,0.10820800065994263
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,16,16,0,0.035818666219711304
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,16,32,0,0.023717333873112995
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,16,2,0,0.23345067103703818
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,16,64,0,0.01798933371901512
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,128,16,1,0,0.46140265464782715
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,32,8,0,0.07025599976380666
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,32,16,0,0.04233066737651825
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,32,4,0,0.12572266658147177
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,32,32,0,0.02624533325433731
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,32,64,0,0.019253333409627277
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,32,2,0,0.24067733685175577
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,32,8,0,0.0694400022427241
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,128,32,1,0,0.4822186628977458
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,64,512,2,0,1.244693358739217
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,32,4,0,0.12403733531634013
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,32,16,0,0.042165334026018776
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,32,32,0,0.025637333591779072
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,32,64,0,0.01937066639463107
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,32,2,0,0.2402613361676534
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,128,32,1,0,0.49578134218851727
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,64,16,0,0.0551093320051829
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,64,8,0,0.09060266613960266
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,64,32,0,0.03696000079313914
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,64,64,0,0.024314666787783306
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,64,4,0,0.15913066267967224
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,64,2,0,0.2993813355763753
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,64,8,0,0.09014399846394856
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,64,4,0,0.15941333770751953
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,64,16,0,0.05497066676616669
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,64,32,0,0.036117332677046456
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,64,64,0,0.024117333193620045
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,64,2,0,0.30187199513117474
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,128,64,1,0,0.583295981089274
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,128,16,0,0.09521599610646565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,128,8,0,0.1627679963906606
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,128,64,1,0,0.5859839916229248
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,64,512,1,0,2.4800853729248047
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,128,64,0,0.04218666752179464
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,128,32,0,0.06057066718737284
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,128,4,0,0.2989120086034139
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,64,512,1,0,2.538431962331136
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,128,8,0,0.16107733050982156
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,128,16,0,0.09517332911491394
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,128,2,0,0.555295983950297
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,128,64,0,0.04276266694068909
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,128,32,0,0.0621973325808843
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,128,4,0,0.3070453405380249
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,128,256,16,0,0.16085333625475565
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,128,2,0,0.5558559894561768
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,128,256,8,0,0.277946670850118
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,128,256,64,0,0.07714133461316426
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,128,256,32,0,0.10107200344403584
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,128,128,1,0,1.1138933499654133
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,128,256,4,0,0.5167093276977539
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,128,128,1,0,1.1221760114034016
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,128,256,16,0,0.16004799803098044
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,128,256,8,0,0.2850773334503174
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,128,256,64,0,0.0763679991165797
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,128,256,32,0,0.10444800059000652
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,256,16,4,0,0.2312800089518229
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,256,16,2,0,0.46114134788513184
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,128,256,2,0,1.0093226432800293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,128,256,4,0,0.5181386470794678
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,256,16,8,0,0.11032533645629883
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,256,16,16,0,0.06374933322270711
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,256,16,32,0,0.03694933404525121
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,256,16,64,0,0.02515200028816859
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,256,16,1,0,0.9259520371754965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,256,16,8,0,0.11008000373840332
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,256,16,16,0,0.0632533331712087
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,256,16,4,0,0.23739200830459595
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,256,16,64,0,0.02499199906984965
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,256,16,32,0,0.03702399879693985
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,256,16,2,0,0.4647093216578166
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,256,16,1,0,0.9232906500498453
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,256,32,8,0,0.12610133488972983
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,256,32,4,0,0.24624532461166382
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,256,32,32,0,0.045466666420300804
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,256,32,16,0,0.07379200061162312
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,256,32,64,0,0.029365333418051403
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,256,32,2,0,0.4896693229675293
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,128,256,2,0,1.0016427040100098
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,256,32,16,0,0.07401599983374278
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,256,32,8,0,0.1260373294353485
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,256,32,4,0,0.24836800495783487
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,256,32,32,0,0.0443146675825119
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,256,32,64,0,0.029919999341169994
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,256,32,2,0,0.4859466552734375
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,256,32,1,0,0.9777812957763672
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,256,64,16,0,0.0960053304831187
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,256,64,8,0,0.1653493344783783
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,256,64,32,0,0.06218666831652323
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,256,32,1,0,0.9773706595102946
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,256,64,64,0,0.043381333351135254
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,256,64,4,0,0.3011893431345622
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,256,64,8,0,0.16597867012023926
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,256,64,2,0,0.5824426809946696
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,128,256,1,0,2.0221813519795737
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,256,64,4,0,0.3016693393389384
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,256,64,16,0,0.09686400492986043
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,128,256,1,0,2.0657706260681152
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,256,64,32,0,0.06257066627343495
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,256,64,64,0,0.044010668992996216
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,256,64,2,0,0.5825226704279581
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,8,256,128,16,0,0.17241599162419638
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,4,256,128,32,0,0.1079306701819102
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,16,256,128,8,0,0.3006880084673564
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,2,256,128,64,0,0.07339199880758922
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,256,64,1,0,1.1778933207194011
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,32,256,128,4,0,0.5967679818471273
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,16,256,128,8,0,0.30849067370096844
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,256,64,1,0,1.1501333713531494
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,8,256,128,16,0,0.17612799008687338
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,4,256,128,32,0,0.10855467120806377
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,2,256,128,64,0,0.07660800218582153
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,32,256,128,4,0,0.5830239852269491
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,64,256,128,2,0,1.0997386773427327
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,64,256,128,2,0,1.103216012318929
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,fp8,128,256,128,1,0,2.245759963989258
TRTLLM,1.0.0rc3,NVIDIA H200,mla_context,default,float16,float16,128,256,128,1,0,2.247477372487386
