framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,16,1,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,16,2,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,16,4,0,0.01545599972208341
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,16,16,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,16,8,0,0.014917333920796713
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,16,32,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,16,2,0,0.01575999955336253
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,16,4,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,16,8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,16,16,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,16,1,0,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,16,64,0,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,16,32,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,16,64,0,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,32,1,0,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,32,2,0,0.014853333433469137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,32,4,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,32,8,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,32,16,0,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,32,32,0,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,32,64,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,32,1,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,32,2,0,0.01646399994691213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,32,4,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,32,8,0,0.016783999900023144
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,32,16,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,32,32,0,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,32,64,0,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,64,2,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,64,1,0,0.01701333373785019
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,64,4,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,64,8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,64,16,0,0.017125333348910015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,64,64,0,0.014848000059525171
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,64,1,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,64,2,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,64,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,64,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,64,16,0,0.015872000406185787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,64,32,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,64,64,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,128,4,0,0.01701333373785019
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,128,1,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,128,2,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,128,8,0,0.017279999951521557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,128,16,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,128,32,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,128,64,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,128,1,0,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,128,2,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,128,4,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,128,8,0,0.016773333152135212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,128,16,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,128,32,0,0.017658667018016178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,128,64,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,256,1,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,256,2,0,0.021045332153638203
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,256,4,0,0.020725333442290623
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,256,8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,256,16,0,0.02065066620707512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,256,32,0,0.020762667059898376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,256,64,0,0.02073066681623459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,256,1,0,0.022810667753219604
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,256,2,0,0.020768000433842342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,256,8,0,0.02091199904680252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,256,4,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,256,16,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,256,64,0,0.021045332153638203
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,256,32,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,512,1,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,512,4,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,512,8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,512,16,0,0.02480533222357432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,512,32,0,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,512,64,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,512,1,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,512,2,0,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,512,4,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,512,16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,512,8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,512,32,0,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1024,2,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1024,4,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1024,1,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1024,8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1024,16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1024,32,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,1024,64,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,1024,2,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,1024,1,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,1024,4,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,1024,8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,1024,16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,1024,32,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1536,2,0,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1536,4,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1536,1,0,0.09591999650001526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1536,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1536,16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1536,8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,1536,2,0,0.05904533465703329
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,1536,4,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,1536,1,0,0.09318400422732036
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,1536,8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,1536,16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,1536,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,1536,64,0,0.03514666606982549
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,2048,2,0,0.08088533580303192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,2048,4,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,2048,1,0,0.13329600294431052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,2048,16,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,2048,8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,2048,32,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,2048,64,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,2048,2,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,2048,4,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,2048,1,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,2048,8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,2048,16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,2048,32,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,2048,64,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,3072,4,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,3072,2,0,0.1346506675084432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,3072,8,0,0.060085331400235496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,3072,1,0,0.22835199038187662
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,3072,16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,3072,32,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,3072,64,0,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,3072,4,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,3072,2,0,0.1384106675783793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,3072,1,0,0.2259626587231954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,3072,16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,3072,8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,3072,32,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,3072,64,0,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,4096,4,0,0.1307253340880076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,4096,2,0,0.20138667027155557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,4096,8,0,0.07645333309968312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,4096,1,0,0.34969600041707355
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,4096,16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,4096,32,0,0.0699786643187205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,4096,64,0,0.06826133529345195
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,4096,4,0,0.12902933359146118
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,4096,2,0,0.20565332969029745
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,4096,16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,4096,8,0,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,4096,1,0,0.34833065668741864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,4096,32,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,4096,64,0,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,6144,4,0,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,6144,2,0,0.3729013204574585
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,6144,8,0,0.13500266273816428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,6144,16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,6144,32,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,6144,1,0,0.6594560146331787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,6144,64,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,6144,4,0,0.2259626587231954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,6144,2,0,0.36848000685373944
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,6144,8,0,0.13687466581662497
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,6144,32,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,6144,16,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,6144,1,0,0.66594131787618
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,6144,64,0,0.09557333588600159
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,8192,4,0,0.3421866496404012
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,8192,8,0,0.21777600049972534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,8192,2,0,0.5864053169886271
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,8192,32,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,8192,16,0,0.13397333025932312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,8192,64,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,8192,1,0,1.0654719670613606
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,8192,4,0,0.33740798632303876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,8192,2,0,0.5874186754226685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,8192,8,0,0.21606399615605673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,8192,32,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,8192,16,0,0.13260799646377563
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,8192,1,0,1.082202672958374
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,8192,64,0,0.12117866675059001
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,10240,4,0,0.47598934173583984
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,10240,2,0,0.851967970530192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,10240,8,0,0.2978079915046692
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,10240,16,0,0.1763040026028951
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,10240,32,0,0.149317334095637
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,10240,64,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,10240,1,0,1.5580159823099773
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,10240,4,0,0.4804266691207886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,10240,2,0,0.8290987014770508
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,10240,8,0,0.2998560070991516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,10240,16,0,0.17493333419164023
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,10240,32,0,0.15240533153216043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,10240,64,0,0.14779733618100485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,10240,1,0,1.528831958770752
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,12288,4,0,0.6377813418706259
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,12288,2,0,1.1521706581115723
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,12288,8,0,0.38416532675425213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,12288,16,0,0.2409813404083252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,12288,32,0,0.18175999323527017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,12288,64,0,0.17356799046198526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,12288,1,0,2.1428960164388022
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,12288,4,0,0.6449546813964844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,12288,2,0,1.156608025232951
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,12288,16,0,0.2409813404083252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,12288,8,0,0.3872426748275757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,12288,32,0,0.1807360053062439
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,12288,64,0,0.17220266660054526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,12288,1,0,2.1261653900146484
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,16384,4,0,1.037823994954427
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,16384,2,0,1.9525973002115886
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,16384,8,0,0.5952853361765543
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,16384,16,0,0.37768534819285077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,16384,32,0,0.25702933470408124
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,512,2,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,1,16384,64,0,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,16384,1,0,3.8046773274739585
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,16384,4,0,1.0466986497243245
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,16384,8,0,0.6038186550140381
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,16384,16,0,0.37837334473927814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,16384,2,0,1.9297280311584473
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,16384,32,0,0.2604373296101888
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,1,16384,64,0,0.22901866833368936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,16,1,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,16,2,0,0.0164533331990242
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,16,4,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,16,8,0,0.014597332725922266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,16,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,16,1,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,16,32,0,0.014853333433469137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,16,64,0,0.014602666099866232
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,16,2,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,16,8,0,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,16,4,0,0.01611199975013733
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,16,16,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,16,32,0,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,16,64,0,0.01461333284775416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,32,1,0,0.016943999876578648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,32,2,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,32,4,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,16384,1,0,3.560789426167806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,32,8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,32,16,0,0.015189333508412043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,32,32,0,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,32,64,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,32,1,0,0.017125333348910015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,32,2,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,32,4,0,0.016544000556071598
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,32,8,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,32,16,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,32,32,0,0.014746667196353277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,32,64,0,0.014602666099866232
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,64,2,0,0.016943999876578648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,64,1,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,64,8,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,64,4,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,64,16,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,64,64,0,0.01562133307258288
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,64,1,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,64,2,0,0.016688000410795212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,64,4,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,64,8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,64,32,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,64,16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,64,64,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,128,2,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,128,4,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,128,1,0,0.019061333189407986
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,128,8,0,0.017653333644072216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,128,16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,128,32,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,128,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,128,1,0,0.019402666638294857
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,128,2,0,0.018858666221300762
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,128,4,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,128,8,0,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,128,16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,128,32,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,128,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,256,1,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,256,2,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,256,4,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,256,8,0,0.020736000190178554
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,256,16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,256,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,256,1,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,256,4,0,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,256,2,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,256,16,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,256,8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,256,32,0,0.020768000433842342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,256,64,0,0.020725333442290623
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,512,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,512,4,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,512,16,0,0.024826665719350178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,512,8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,512,64,0,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,512,1,0,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,512,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,512,8,0,0.02480533222357432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,512,4,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,512,16,0,0.024800000091393787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,512,32,0,0.023152001202106476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1024,2,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1024,1,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1024,4,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1024,8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1024,16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1024,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,1024,2,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,1024,1,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,1024,4,0,0.0365280012289683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,1024,8,0,0.032773333291212715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,1024,16,0,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,1024,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1536,2,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1536,1,0,0.15769066413243613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1536,4,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1536,8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1536,16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1536,32,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,1536,2,0,0.0962559978167216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,1536,1,0,0.15871999661127725
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,1536,4,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,1536,8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,1536,16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,1536,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,2048,2,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,2048,1,0,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,2048,4,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,2048,8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,2048,16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,2048,32,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,2048,64,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,2048,2,0,0.13243732849756876
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,2048,1,0,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,2048,4,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,2048,8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,2048,16,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,2048,32,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,2048,64,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,3072,2,0,0.22971733411153158
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,3072,4,0,0.1378933290640513
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,3072,1,0,0.41369601090749103
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,3072,16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,3072,8,0,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,3072,32,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,3072,64,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,3072,2,0,0.22801067431767783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,3072,4,0,0.13635733723640442
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,3072,1,0,0.41335467497507733
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,3072,16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,3072,8,0,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,3072,32,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,3072,64,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,4096,2,0,0.35140268007914227
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,4096,4,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,4096,1,0,0.6456319888432821
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,4096,16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,4096,8,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,4096,32,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,4096,64,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,4096,2,0,0.3510613441467285
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,4096,4,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,4096,1,0,0.6463093360265096
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,4096,16,0,0.07747200131416321
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,4096,8,0,0.130730668703715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,4096,32,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,6144,1,0,1.2598666350046794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,6144,2,0,0.6703733603159586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,6144,4,0,0.37461332480112713
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,6144,8,0,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,6144,16,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,6144,32,0,0.10171733299891154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,6144,64,0,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,512,1,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,6144,2,0,0.6604746580123901
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,6144,4,0,0.37666134039560956
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,6144,8,0,0.22528000672658285
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,6144,16,0,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,6144,1,0,1.2608746687571208
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,6144,32,0,0.10274666547775269
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,6144,64,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,8192,4,0,0.5966506799062093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,8192,8,0,0.3449173370997111
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,8192,2,0,1.0415786902109783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,8192,32,0,0.14045866330464682
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,8192,16,0,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,8192,64,0,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,8192,1,0,2.0210347175598145
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,8192,4,0,0.5932480096817017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,8192,2,0,1.0525013605753581
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,8192,16,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,8192,8,0,0.34457600116729736
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,8192,32,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,8192,64,0,0.12834133704503378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,8192,1,0,2.0363893508911133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,10240,4,0,0.8536746501922607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,10240,2,0,1.567237377166748
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,10240,8,0,0.4889599879582723
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,10240,16,0,0.3025866746902466
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,10240,32,0,0.18449066082636514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,10240,64,0,0.16366400321324667
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,10240,1,0,2.970282554626465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,10240,2,0,1.5508480072021484
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,10240,4,0,0.8441119988759359
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,10240,16,0,0.2995199958483378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,10240,8,0,0.4848639965057373
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,10240,32,0,0.18175466855367026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,10240,64,0,0.16264533003171286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,10240,1,0,3.0411094029744468
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,12288,4,0,1.185109297434489
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,12288,2,0,2.1473280588785806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,12288,8,0,0.6555360158284506
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,12288,16,0,0.39133866628011066
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,12288,32,0,0.24780799945195517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,12288,64,0,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,4096,64,0,0.07065066695213318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,12288,1,0,4.112042744954427
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,12288,8,0,0.6418773333231608
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,12288,2,0,2.1654186248779297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,12288,4,0,1.1729973157246907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,12288,16,0,0.38792534669240314
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,12288,32,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,12288,64,0,0.20360533396402994
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,12288,1,0,4.202837308247884
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,16384,4,0,1.9000320434570312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,16384,16,0,0.6068906784057617
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,16384,8,0,1.0610346794128418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,16384,32,0,0.38417065143585205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,2,16384,64,0,0.27698665857315063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,16384,2,0,3.6756534576416016
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,16384,4,0,1.8891199429829915
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,16384,1,0,8.022879918416342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,16384,8,0,1.0222933292388916
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,16384,2,0,3.671210606892904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,16384,32,0,0.38356268405914307
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,16,1,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,16384,16,0,0.6004053354263306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,2,16384,64,0,0.27767467498779297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,16,2,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,16,4,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,16,8,0,0.014949332922697067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,16,16,0,0.016469333320856094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,16,32,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,16,64,0,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,16,1,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,16,2,0,0.01826133330663045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,16,4,0,0.015295999745527903
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,16,16,0,0.016469333320856094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,16,8,0,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,16,32,0,0.017642666896184284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,16,64,0,0.01525866612792015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,32,1,0,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,32,2,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,32,4,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,32,8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,32,16,0,0.01544533297419548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,32,32,0,0.016783999900023144
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,32,64,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,32,1,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,32,2,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,32,4,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,32,8,0,0.014853333433469137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,32,16,0,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,32,32,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,32,64,0,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,64,1,0,0.02436800052722295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,64,2,0,0.01858666663368543
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,64,4,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,64,16,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,64,8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,16384,1,0,8.151557286580404
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,64,32,0,0.01664000004529953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,64,64,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,64,1,0,0.024133334557215374
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,64,2,0,0.01836266616980235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,64,8,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,64,4,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,64,16,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,64,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,64,32,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,128,1,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,128,2,0,0.018853332847356796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,128,4,0,0.018874666343132656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,128,8,0,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,128,16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,128,32,0,0.016970666746298473
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,128,64,0,0.016943999876578648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,128,1,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,128,2,0,0.01868266612291336
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,128,8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,128,4,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,128,16,0,0.01791999985774358
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,128,64,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,128,32,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,256,1,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,256,4,0,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,256,8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,256,2,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,256,16,0,0.02090666691462199
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,256,32,0,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,256,64,0,0.020703999946514767
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,256,1,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,256,4,0,0.022826666633288067
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,256,2,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,256,8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,256,32,0,0.020784000555674236
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,256,16,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,256,64,0,0.02090666691462199
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,512,4,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,512,8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,512,2,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,512,1,0,0.07986666758855183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,512,16,0,0.024847999215126038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,512,64,0,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,512,8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,512,4,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,512,2,0,0.04574933151404063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,512,16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,512,1,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,512,32,0,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1024,8,0,0.035487999518712364
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1024,4,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1024,2,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1024,32,0,0.0317546675602595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1024,16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1024,1,0,0.17203199863433838
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,1024,8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,1024,4,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,1024,2,0,0.09728533029556274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,1024,16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,1024,32,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,1024,1,0,0.16844799121220908
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1536,4,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1536,2,0,0.15957333644231161
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1536,8,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1536,16,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1536,32,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1536,1,0,0.28996266921361286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,1536,4,0,0.0962559978167216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,1536,2,0,0.16008533040682474
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,1536,8,0,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,1536,16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,1536,32,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,1536,64,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,1536,1,0,0.2834773262341817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,2048,4,0,0.13226667046546936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,2048,2,0,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,2048,8,0,0.08362666765848796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,2048,16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,2048,32,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,2048,64,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,2048,1,0,0.4278613328933716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,2048,4,0,0.13397333025932312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,2048,2,0,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,2048,8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,2048,16,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,2048,32,0,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,2048,64,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,2048,1,0,0.41949331760406494
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,3072,4,0,0.22971733411153158
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,3072,2,0,0.41574398676554364
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,3072,8,0,0.13875200351079306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,3072,32,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,3072,16,0,0.09114133318265279
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,3072,64,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,3072,1,0,0.7782399654388428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,3072,4,0,0.2345013419787089
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,3072,2,0,0.4082346757253011
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,3072,8,0,0.14012266198794046
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,3072,16,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,3072,64,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,3072,32,0,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,3072,1,0,0.7744853496551514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,4096,4,0,0.35652267932891846
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,4096,2,0,0.6545066833496094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,4096,8,0,0.20940800507863364
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,4096,32,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,4096,16,0,0.13550933202107748
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,4096,64,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,4096,1,0,1.213098684946696
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,4096,2,0,0.6370986700057983
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,4096,4,0,0.3524266481399536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,4096,16,0,0.13414399822553
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,4096,8,0,0.2097546656926473
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,4096,32,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,4096,64,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,4096,1,0,1.2448480129241943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,6144,4,0,0.6789120038350424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,6144,2,0,1.2654933134714763
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,6144,8,0,0.3824640115102132
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,6144,16,0,0.23040533065795898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,6144,32,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,6144,64,0,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,6144,1,0,2.345813274383545
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,6144,2,0,1.224021355311076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,6144,4,0,0.6669653256734213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,6144,16,0,0.22801067431767783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,6144,8,0,0.38075733184814453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,6144,64,0,0.11742400129636128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,6144,32,0,0.1460906664530436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,6144,1,0,2.47927459081014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,1024,64,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,8192,4,0,1.1023306846618652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,8192,16,0,0.3524320125579834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,8192,8,0,0.5959626833597819
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,8192,2,0,1.9923680623372395
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,4,8192,64,0,0.15718932946523032
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,8192,32,0,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,8192,4,0,1.1036960283915203
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,8192,1,0,3.9282347361246743
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,8192,8,0,0.6007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,8192,16,0,0.34935466448465985
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,8192,2,0,2.057216008504232
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,8192,32,0,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,16,1,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,4,8192,64,0,0.1585493286450704
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,16,2,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,16,4,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,16,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,16,16,0,0.01657066618402799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,16,32,0,0.01563199982047081
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,16,64,0,0.016789333273967106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,16,1,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,16,2,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,16,4,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,16,8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,16,32,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,16,16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,16,64,0,0.015301333119471868
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,32,1,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,32,2,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,32,4,0,0.016645333419243496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,32,8,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,32,16,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,32,32,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,32,64,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,32,1,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,32,2,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,32,4,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,32,8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,32,16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,32,32,0,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,32,64,0,0.016447999825080235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,64,2,0,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,64,1,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,64,4,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,64,8,0,0.01764800027012825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,8192,1,0,3.9069013595581055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,64,16,0,0.016538667182127636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,64,32,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,64,64,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,64,1,0,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,64,2,0,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,64,4,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,64,8,0,0.018325333793958027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,64,16,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,64,32,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,64,64,0,0.016490666816631954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,128,1,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,128,4,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,128,8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,128,2,0,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,128,16,0,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,128,32,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,128,64,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,128,2,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,128,8,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,128,4,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,128,1,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,128,16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,128,32,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,128,64,0,0.016778666526079178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,256,4,0,0.025199999411900837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,256,2,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,256,8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,256,1,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,256,16,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,256,64,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,256,4,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,256,8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,256,2,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,256,1,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,256,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,256,32,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,256,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,512,8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,512,4,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,512,2,0,0.08191466828187306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,512,16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,512,64,0,0.02480533222357432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,512,1,0,0.13926399747530618
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,512,4,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,512,8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,512,2,0,0.08054933448632558
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,512,16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,512,32,0,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,512,1,0,0.1389226714769999
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1024,8,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1024,4,0,0.10035733381907146
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1024,16,0,0.038245332737763725
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1024,32,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1024,2,0,0.17169066270192465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,1024,64,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1024,1,0,0.3199999928474426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,1024,4,0,0.09967999656995137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,1024,2,0,0.1711840033531189
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,1024,8,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,1024,16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,1024,32,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,1024,64,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,1024,1,0,0.3199999928474426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1536,4,0,0.16366933782895407
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1536,8,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1536,16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1536,32,0,0.0433599998553594
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1536,2,0,0.2868853410085042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,1536,64,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,1536,4,0,0.1634986698627472
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1536,1,0,0.5396373271942139
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,1536,2,0,0.2872320016225179
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,1536,16,0,0.06483733157316844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,1536,8,0,0.0993386705716451
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,1536,32,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,1536,64,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,1536,1,0,0.5420373280843099
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,2048,4,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,2048,8,0,0.13875200351079306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,2048,16,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,2048,32,0,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,2048,2,0,0.42239999771118164
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,2048,64,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,2048,4,0,0.23517866929372153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,2048,1,0,0.8060533205668131
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,2048,16,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,2048,8,0,0.13755200306574503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,2048,2,0,0.4217173258463542
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,2048,32,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,2048,64,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,2048,1,0,0.812544027964274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,3072,4,0,0.41915734608968097
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,3072,16,0,0.14404267072677612
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,3072,8,0,0.2402986685434977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,3072,64,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,3072,32,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,3072,2,0,0.7789226373036703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,3072,4,0,0.4213706652323405
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,3072,1,0,1.478314717610677
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,3072,8,0,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,3072,2,0,0.7758399645487467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,3072,16,0,0.14506666858990988
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,3072,32,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,3072,64,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,4096,4,0,0.6541706720987955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,3072,1,0,1.4649972915649414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,4096,16,0,0.21775466203689575
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,4096,8,0,0.3619786500930786
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,4096,32,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,4096,2,0,1.2202666600545247
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,8,4096,64,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,4096,4,0,0.6555253267288208
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,4096,1,0,2.305023988087972
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,4096,8,0,0.36369065443674725
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,4096,2,0,1.240063985188802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,4096,32,0,0.14114133516947427
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,4096,16,0,0.21400533119837442
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,8,4096,64,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,16,1,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,16,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,16,4,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,16,8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,16,32,0,0.01647466669480006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,16,16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,16,64,0,0.015872000406185787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,16,1,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,16,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,16,8,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,16,4,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,16,16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,16,32,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,16,64,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,32,1,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,32,2,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,32,4,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,32,8,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,32,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,32,32,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,32,64,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,32,1,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,32,2,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,32,4,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,32,8,0,0.01834133391578992
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,32,16,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,32,32,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,32,64,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,64,2,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,64,4,0,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,64,1,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,4096,1,0,2.3649279276529946
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,64,16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,64,8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,64,32,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,64,64,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,64,4,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,64,2,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,64,1,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,64,8,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,64,16,0,0.01699200024207433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,64,32,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,64,64,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,128,8,0,0.020714666694402695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,128,4,0,0.027653334041436512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,128,2,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,128,1,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,128,16,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,128,32,0,0.018650667121013004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,128,64,0,0.0169813334941864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,128,4,0,0.027642667293548584
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,128,8,0,0.01903466631968816
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,128,2,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,128,1,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,128,64,0,0.018538666268189747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,128,16,0,0.019018666197856266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,128,32,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,256,8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,256,4,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,256,2,0,0.06962666908899943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,256,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,256,32,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,256,64,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,256,1,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,256,4,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,256,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,256,2,0,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,256,16,0,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,256,32,0,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,256,64,0,0.02275199939807256
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,256,1,0,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,512,8,0,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,512,4,0,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,512,16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,512,32,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,512,2,0,0.14284800489743552
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,512,64,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,512,1,0,0.25975465774536133
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,512,4,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,512,2,0,0.14250666896502176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,512,16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,512,8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,512,32,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,512,64,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,512,1,0,0.2600906689961751
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1024,8,0,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1024,4,0,0.1776640017827352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1024,32,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1024,16,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1024,2,0,0.3237546682357788
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,1024,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,1024,4,0,0.17767467101415
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1024,1,0,0.6171466509501139
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,1024,8,0,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,1024,16,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,1024,2,0,0.32443734010060626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,1024,32,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,1024,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,1024,1,0,0.6099626620610555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1536,4,0,0.29815467198689777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1536,8,0,0.16741865873336792
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1536,16,0,0.10546132922172546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1536,32,0,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,1536,64,0,0.05161066850026449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1536,2,0,0.5464746554692587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,1536,4,0,0.29678932825724286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1536,1,0,1.0463573137919109
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,1536,8,0,0.16946667432785034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,1536,2,0,0.5461333195368449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,1536,16,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,1536,64,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,1536,32,0,0.07133333384990692
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,1536,1,0,1.0350879828135173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,2048,4,0,0.43537068367004395
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,2048,16,0,0.14523200194040933
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,2048,8,0,0.24132267634073892
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,2048,32,0,0.09686932961146037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,2048,2,0,0.8060586452484131
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,16,2048,64,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,2048,4,0,0.42922667662302655
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,2048,1,0,1.5802027384440105
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,2048,8,0,0.24200532833735147
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,2048,2,0,0.813221295674642
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,2048,32,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,2048,16,0,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,16,2048,64,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,16,1,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,16,2,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,16,4,0,0.030031998952229817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,16,8,0,0.020928000410397846
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,16,16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,16,32,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,32,16,64,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,16,2,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,16,4,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,16,1,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,16,8,0,0.02274133265018463
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,16,16,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,16,32,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,32,16,64,0,0.016469333320856094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,32,2,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,32,4,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,32,1,0,0.07747733096281688
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,32,8,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,32,16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,32,32,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,32,32,64,0,0.01651200031240781
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,32,2,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,32,1,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,32,4,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,32,8,0,0.02325333406527837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,32,16,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,32,32,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,32,32,64,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,2048,1,0,1.5588693618774414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,64,2,0,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,64,8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,64,4,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,64,1,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,64,16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,32,64,64,0,0.016682667036851246
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,64,32,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,64,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,64,4,0,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,64,1,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,64,2,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,64,16,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,64,32,0,0.016943999876578648
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,32,64,64,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,128,8,0,0.028325334191322327
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,128,4,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,128,2,0,0.06723733246326447
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,128,16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,128,32,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,128,1,0,0.11400533715883891
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,128,4,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,128,2,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,128,16,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,128,8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,128,1,0,0.11468799908955891
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,128,32,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,32,128,64,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,256,4,0,0.07202666501204173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,256,2,0,0.12185600399971008
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,256,8,0,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,256,16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,256,32,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,256,1,0,0.22392533222834268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,32,256,64,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,256,4,0,0.07168533404668172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,256,2,0,0.12185600399971008
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,256,8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,256,16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,256,32,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,32,256,64,0,0.023103999594847362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,256,1,0,0.22016000747680664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,512,4,0,0.14642666776974997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,512,8,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,512,2,0,0.2681173284848531
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,512,32,0,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,512,16,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,32,512,64,0,0.030373332401116688
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,512,1,0,0.5029546817143759
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,512,4,0,0.14574933052062988
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,512,2,0,0.26710400978724164
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,512,16,0,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,512,8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,512,32,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,32,512,64,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,512,1,0,0.5036373138427734
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,1024,4,0,0.3326293428738912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,1024,8,0,0.1884160041809082
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,1024,2,0,0.6178186734517416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,1024,32,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,1024,16,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,32,1024,64,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,1024,1,0,1.195690631866455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,1024,2,0,0.6164426803588867
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,1024,4,0,0.3322880069414775
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,1024,16,0,0.11513599753379822
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,1024,8,0,0.18551466862360635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,32,1024,64,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,1024,32,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,16,2,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,16,1,0,0.12732266386349997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,16,4,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,16,8,0,0.02924266705910365
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,16,16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,16,32,0,0.017130666722853977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,64,16,64,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,1024,1,0,1.1997919877370198
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,16,1,0,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,16,2,0,0.06996800005435944
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,16,8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,16,4,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,16,16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,16,32,0,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,64,16,64,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,32,4,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,32,1,0,0.13550399740537009
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,32,2,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,32,8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,32,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,32,32,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,64,32,64,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,32,2,0,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,32,4,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,32,1,0,0.13636799653371176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,32,8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,32,16,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,32,32,0,0.018522666146357853
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,64,32,64,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,64,4,0,0.05427733560403188
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,64,2,0,0.09148800373077393
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,64,16,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,64,8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,64,1,0,0.16059733430544534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,64,32,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,64,64,64,0,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,64,4,0,0.05460800230503082
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,64,2,0,0.09113066395123799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,64,8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,64,16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,64,1,0,0.16025599837303162
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,64,32,0,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,64,64,64,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,128,4,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,128,2,0,0.11673067013422649
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,128,8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,32,128,64,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,128,32,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,128,16,0,0.03071466585000356
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,64,128,64,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,128,1,0,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,128,4,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,128,8,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,128,2,0,0.116047998269399
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,128,16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,128,32,0,0.021104000508785248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,64,128,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,128,1,0,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,256,4,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,256,8,0,0.07674133280913036
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,256,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,256,16,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,256,2,0,0.22494399547576904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,64,256,64,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,256,4,0,0.1276639997959137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,256,1,0,0.42717333634694415
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,256,8,0,0.07782933115959167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,256,2,0,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,256,16,0,0.05086400111516317
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,64,256,64,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,256,32,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,256,1,0,0.42138131459554035
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,512,4,0,0.2752853234608968
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,512,8,0,0.15581867098808289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,512,16,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,512,32,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,64,512,64,0,0.04643199841181437
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,512,2,0,0.5128533442815145
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,512,4,0,0.27460267146428424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,512,1,0,0.98798934618632
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,512,2,0,0.5131946802139282
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,512,8,0,0.15581867098808289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,64,512,64,0,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,512,32,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,512,16,0,0.09556800127029419
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,16,2,0,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,16,4,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,16,1,0,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,16,8,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,16,16,0,0.030373332401116688
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,16,32,0,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,128,16,64,0,0.018725333114465077
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,16,4,0,0.07168533404668172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,16,2,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,16,1,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,16,8,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,16,16,0,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,128,16,64,0,0.018650667121013004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,16,32,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,32,4,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,32,2,0,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,512,1,0,0.9886720180511475
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,32,8,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,32,1,0,0.2529279987017314
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,32,16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,32,32,0,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,128,32,64,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,32,8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,32,4,0,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,32,2,0,0.1372160017490387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,32,16,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,32,32,0,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,128,32,64,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,32,1,0,0.2525866627693176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,64,4,0,0.09386666615804036
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,64,8,0,0.056661332647005715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,64,32,0,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,64,16,0,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,64,2,0,0.1633333365122477
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,128,64,64,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,64,1,0,0.30565865834554035
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,64,4,0,0.09250666697820027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,64,2,0,0.16264533003171286
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,64,16,0,0.03710933278004328
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,64,8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,64,32,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,128,64,64,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,64,1,0,0.30771734317143756
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,128,8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,128,4,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,128,16,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,128,32,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,128,128,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,128,2,0,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,128,4,0,0.12185600399971008
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,128,1,0,0.4102826515833537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,128,8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,128,2,0,0.2187946637471517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,128,16,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,128,32,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,128,128,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,128,1,0,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,256,4,0,0.23859200874964395
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,256,8,0,0.13499733805656433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,256,16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,256,32,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,128,256,64,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,256,2,0,0.43536531925201416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,256,4,0,0.237226665019989
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,256,1,0,0.8313173453013102
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,256,2,0,0.4350293477376302
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,256,8,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,128,256,64,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,256,32,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,256,16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,256,16,4,0,0.1293706695238749
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,256,16,2,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,256,16,16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,256,16,8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,256,16,1,0,0.4490186770757039
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,256,16,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,256,16,64,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,256,16,4,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,256,16,2,0,0.23619200785954794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,256,16,8,0,0.07201600074768066
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,256,16,1,0,0.4486986796061198
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,256,16,16,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,256,16,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,256,16,64,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,256,1,0,0.8255146344502767
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,256,32,8,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,256,32,4,0,0.13756266236305237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,256,32,16,0,0.04941866795221964
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,256,32,32,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,256,32,2,0,0.25463465849558514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,256,32,64,0,0.024853333830833435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,256,32,1,0,0.4930560191472371
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,256,32,4,0,0.13824533422787985
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,256,32,8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,256,32,2,0,0.2553120056788127
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,256,32,16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,256,32,32,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,256,32,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,256,32,1,0,0.49033065636952716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,256,64,4,0,0.16810667514801025
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,256,64,8,0,0.09899200002352397
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,256,64,32,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,256,64,16,0,0.061434666315714516
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,256,64,2,0,0.3135146697362264
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,256,64,64,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,256,64,4,0,0.16810667514801025
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,256,64,1,0,0.5990399916966757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,256,64,8,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,256,64,2,0,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,256,64,16,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,256,64,64,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,256,64,32,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,256,64,1,0,0.5976746479670206
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,256,128,4,0,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,256,128,8,0,0.13141333063443503
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,256,128,16,0,0.08362666765848796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,256,128,32,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,1,256,128,64,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,256,128,2,0,0.41847999890645343
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,256,128,4,0,0.22835199038187662
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,256,128,1,0,0.8029867013295492
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,256,128,8,0,0.13209600249926248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,256,128,2,0,0.41606934865315753
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,256,128,32,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,256,128,16,0,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,1,256,128,64,0,0.045050665736198425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,16,1,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,16,2,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,16,4,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,16,8,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,16,16,0,0.015957333147525787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,16,32,0,0.014912000546852747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,16,64,0,0.01727466657757759
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,16,2,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,16,4,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,16,1,0,0.01682666689157486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,16,8,0,0.016789333273967106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,16,16,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,16,32,0,0.014848000059525171
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,16,64,0,0.014746667196353277
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,32,1,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,32,2,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,32,4,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,32,8,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,32,16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,32,32,0,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,32,64,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,32,1,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,32,4,0,0.016597333053747814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,32,16,0,0.01664000004529953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,32,8,0,0.016506666938463848
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,32,32,0,0.016688000410795212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,32,64,0,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,256,128,1,0,0.8026453653971354
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,64,1,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,64,2,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,64,4,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,64,8,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,64,16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,64,32,0,0.014954666296641031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,64,64,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,64,1,0,0.0173333336909612
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,64,2,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,64,4,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,64,8,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,64,16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,64,32,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,64,64,0,0.015637333194414776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,128,1,0,0.01904533306757609
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,128,2,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,128,4,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,128,16,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,128,8,0,0.016778666526079178
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,128,32,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,128,64,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,128,1,0,0.019050666441520054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,128,4,0,0.018687999496857326
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,128,2,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,128,8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,128,16,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,128,32,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,128,64,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,256,2,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,256,1,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,256,4,0,0.020768000433842342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,256,16,0,0.020714666694402695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,256,8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,256,32,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,256,64,0,0.020714666694402695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,256,1,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,256,2,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,256,4,0,0.021061333517233532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,256,8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,256,16,0,0.020762667059898376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,256,32,0,0.020703999946514767
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,256,64,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,512,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,512,1,0,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,512,4,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,512,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,512,16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,512,32,0,0.02422933280467987
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,512,4,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,512,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,512,1,0,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,512,8,0,0.02480533222357432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,512,16,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,512,32,0,0.023823998868465424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,512,64,0,0.02481066683928172
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1024,4,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1024,2,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,1024,1,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1024,8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1024,16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1024,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,1024,4,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,1024,2,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,1024,1,0,0.0962559978167216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,1024,8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,1024,16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,1024,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1536,4,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1536,2,0,0.09454400340716045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1536,8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1536,16,0,0.03857066730658213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,1536,1,0,0.15820800264676413
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1536,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1536,64,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,1536,4,0,0.058373332023620605
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,1536,2,0,0.09591999650001526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,1536,1,0,0.1585493286450704
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,1536,8,0,0.03994133323431015
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,1536,16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,1536,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,1536,64,0,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,2048,4,0,0.08054933448632558
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,2048,2,0,0.13226667046546936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,2048,8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,2048,1,0,0.22937599817911783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,2048,16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,2048,64,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,2048,32,0,0.043338666359583534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,2048,4,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,2048,2,0,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,2048,8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,2048,1,0,0.22904000679651895
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,2048,16,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,2048,32,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,2048,64,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,3072,4,0,0.13704533378283182
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,3072,2,0,0.22766399383544922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,3072,8,0,0.0846560001373291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,3072,16,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,3072,32,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,3072,1,0,0.4106239875157674
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,3072,64,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,3072,4,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,3072,2,0,0.22937599817911783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,3072,8,0,0.08363200227419536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,3072,16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,3072,64,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,3072,32,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,3072,1,0,0.4126720031102498
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,4096,4,0,0.20599466562271118
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,4096,2,0,0.34969600041707355
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,4096,8,0,0.12902399897575378
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,4096,16,0,0.07578666508197784
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,4096,32,0,0.07100266714890797
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,4096,64,0,0.0699786643187205
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,4096,1,0,0.6432373523712158
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,4096,4,0,0.2058239976565043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,4096,2,0,0.34907201925913495
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,4096,8,0,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,4096,16,0,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,4096,1,0,0.6480160156885783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,4096,32,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,4096,64,0,0.06996800005435944
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,6144,4,0,0.3694933255513509
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,6144,2,0,0.667306661605835
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,6144,8,0,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,6144,16,0,0.1367039978504181
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,6144,32,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,6144,64,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,6144,1,0,1.2661813100179036
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,6144,4,0,0.3742719888687134
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,6144,2,0,0.6679893334706625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,6144,16,0,0.1372160017490387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,6144,8,0,0.2259626587231954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,6144,32,0,0.09865066409111023
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,6144,64,0,0.09665066997210185
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,6144,1,0,1.2547413508097331
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,8192,4,0,0.5870933135350546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,8192,2,0,1.0938133398691814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,8192,8,0,0.3397973378499349
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,8192,32,0,0.12868266304334006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,8192,16,0,0.21845332781473795
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,8192,64,0,0.12321600317955017
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,8192,1,0,2.0145492553710938
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,8192,4,0,0.5908480087916056
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,8192,2,0,1.0443092981974285
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,8192,16,0,0.2153759996096293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,8192,8,0,0.3428693215052287
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,32,2,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,8192,32,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,8192,64,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,8192,1,0,2.028554598490397
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,10240,4,0,0.8407039642333984
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,10240,8,0,0.48520533243815106
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,10240,32,0,0.1730560064315796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,10240,16,0,0.30088533957799274
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,10240,64,0,0.15187733372052512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,10240,2,0,1.5569920539855957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,10240,4,0,0.8526506423950195
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,10240,2,0,1.6174079577128093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,10240,8,0,0.47939733664194745
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,10240,1,0,2.9644692738850913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,10240,64,0,0.14865066607793173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,10240,32,0,0.17339734236399332
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,10240,16,0,0.30190932750701904
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,10240,1,0,2.9991254806518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,12288,4,0,1.172650655110677
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,12288,16,0,0.39202133814493817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,12288,8,0,0.6466453472773234
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,12288,64,0,0.17834667364756265
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,12288,32,0,0.24167466163635254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,12288,2,0,2.2207253774007163
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,12288,4,0,1.1381759643554688
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,12288,8,0,0.6446026563644409
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,12288,1,0,4.3596852620442705
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,12288,2,0,2.179413318634033
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,12288,32,0,0.23961599667867026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,12288,64,0,0.1786880095799764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,12288,16,0,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,1,16384,4,0,1.8867146174112956
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,12288,1,0,4.404911994934082
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,1,16384,16,0,0.5939199924468994
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,1,16384,8,0,1.0228053728739421
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,1,16384,32,0,0.3800906737645467
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,1,16384,64,0,0.25804799795150757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,1,16384,2,0,3.7224159240722656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,1,16384,4,0,1.9065173467000325
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,1,16384,8,0,1.0620640118916829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,1,16384,1,0,7.222117106119792
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,1,16384,2,0,3.625306765238444
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,1,16384,32,0,0.3739306529362996
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,1,16384,16,0,0.6065546671549479
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,1,16384,64,0,0.25565866629282635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,16,1,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,16,2,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,16,4,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,16,8,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,16,16,0,0.01563199982047081
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,16,32,0,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,16,64,0,0.014762666076421738
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,16,1,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,16,2,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,16,4,0,0.016688000410795212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,16,8,0,0.015872000406185787
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,16,16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,16,64,0,0.014618666221698126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,32,1,0,0.02110933264096578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,32,2,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,32,4,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,32,8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,32,16,0,0.016149333367745083
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,32,32,0,0.01632000009218852
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,32,64,0,0.014783999572197596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,32,1,0,0.021776000658671062
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,32,2,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,32,4,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,32,8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,32,16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,1,16384,1,0,7.972357432047526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,32,32,0,0.015130666395028433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,32,64,0,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,64,1,0,0.023221333821614582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,64,2,0,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,64,4,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,64,8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,64,16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,64,32,0,0.016458666572968166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,64,64,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,64,1,0,0.02347733328739802
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,64,2,0,0.018021332720915478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,64,4,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,64,8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,64,32,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,64,16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,64,64,0,0.014789332946141561
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,128,2,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,128,1,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,128,4,0,0.01825599993268649
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,128,16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,128,8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,128,32,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,128,64,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,128,1,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,128,4,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,128,2,0,0.018837332725524902
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,128,8,0,0.016890666137139004
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,128,16,0,0.01711999997496605
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,128,32,0,0.016906666258970898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,128,64,0,0.016927999754746754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,256,2,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,256,1,0,0.039605334401130676
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,256,4,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,256,16,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,256,8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,256,32,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,256,64,0,0.020714666694402695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,256,2,0,0.024847999215126038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,256,1,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,256,4,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,256,8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,256,32,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,256,16,0,0.020762667059898376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,256,64,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,512,2,0,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,512,4,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,512,1,0,0.0798773318529129
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,512,8,0,0.02553066611289978
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,512,16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,512,64,0,0.023904000719388325
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,512,2,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,512,4,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,512,1,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,512,8,0,0.02685333291689555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,512,16,0,0.024853333830833435
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,512,32,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,512,64,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1024,4,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1024,2,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1024,8,0,0.03584533433119456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1024,16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,1024,1,0,0.1687893271446228
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1024,32,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,1024,4,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,1024,2,0,0.09904000163078308
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,1024,16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,1024,8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,1024,1,0,0.1694773236910502
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,1024,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,1024,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1536,4,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1536,2,0,0.15991999705632529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1536,8,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1536,16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1536,32,0,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,1536,1,0,0.2855253418286641
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,1536,4,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,1536,2,0,0.15786133209864298
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,1536,8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,1536,32,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,1536,16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,1536,64,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,1536,1,0,0.28654932975769043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,2048,4,0,0.1327786644299825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,2048,2,0,0.23176000515619913
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,2048,8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,2048,16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,2048,32,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,2048,64,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,2048,1,0,0.42018131415049237
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,2048,4,0,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,2048,2,0,0.2310826579729716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,2048,16,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,2048,8,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,2048,32,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,2048,64,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,2048,1,0,0.4230719804763794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,3072,4,0,0.2307413419087728
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,3072,8,0,0.13857600092887878
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,3072,2,0,0.41198933124542236
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,3072,32,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,3072,16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,3072,64,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,3072,1,0,0.7703893184661865
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,3072,4,0,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,3072,2,0,0.40960001945495605
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,3072,16,0,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,3072,8,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,3072,32,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,3072,64,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,3072,1,0,0.7758506933848063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,4096,4,0,0.3561866680781047
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,4096,2,0,0.6459733247756958
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,4096,8,0,0.2070186734199524
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,4096,32,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,4096,16,0,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,4096,64,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,4096,1,0,1.2045706907908122
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,4096,2,0,0.6500693162282308
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,4096,4,0,0.3513973156611125
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,4096,16,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,4096,8,0,0.20804266134897867
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,4096,32,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,4096,64,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,4096,1,0,1.2277759710947673
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,16,32,0,0.01647466669480006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,6144,4,0,0.6693546772003174
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,6144,16,0,0.22733332713445029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,6144,8,0,0.36983466148376465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,6144,2,0,1.270954688390096
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,6144,32,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,6144,64,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,6144,4,0,0.6608266830444336
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,6144,1,0,2.370394706726074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,6144,8,0,0.3701759974161784
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,6144,2,0,1.257813294728597
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,6144,16,0,0.22563199202219644
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,6144,32,0,0.14012266198794046
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,6144,64,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,8192,4,0,1.0876586437225342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,6144,1,0,2.3956480026245117
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,8192,16,0,0.34594134489695233
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,8192,8,0,0.5836746692657471
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,8192,32,0,0.21947733561197916
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,8192,64,0,0.14421332875887552
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,8192,2,0,1.9882666269938152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,8192,4,0,1.0364533265431721
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,8192,8,0,0.583679993947347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,8192,1,0,4.092368125915527
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,8192,2,0,2.0186452865600586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,8192,32,0,0.21845332781473795
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,8192,16,0,0.3394560019175212
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,8192,64,0,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,10240,4,0,1.5822505950927734
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,8192,1,0,4.03165340423584
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,10240,16,0,0.4886186520258586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,10240,8,0,0.8567466735839844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,10240,64,0,0.1848319967587789
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,10240,32,0,0.30395734310150146
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,10240,2,0,2.977114677429199
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,10240,4,0,1.5295146306355794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,10240,8,0,0.8458240032196045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,10240,2,0,3.043498675028483
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,10240,1,0,6.43174425760905
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,10240,16,0,0.4800800085067749
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,10240,32,0,0.3036160071690877
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,10240,64,0,0.18473599354426065
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,12288,4,0,2.1500585873921714
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,10240,1,0,6.428682963053386
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,12288,16,0,0.6517759958902994
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,12288,8,0,1.174015998840332
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,12288,64,0,0.24644267559051514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,12288,32,0,0.3930453459421794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,12288,2,0,4.418394724527995
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,12288,4,0,2.173269271850586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,12288,8,0,1.1589600245157878
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,12288,2,0,4.486661275227864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,12288,1,0,9.143632253011068
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,12288,64,0,0.24440000454584757
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,12288,16,0,0.6541653474171957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,12288,32,0,0.39133866628011066
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,2,16384,4,0,3.6360534032185874
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,12288,1,0,9.383087793986002
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,2,16384,16,0,1.0576266447703044
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,2,16384,8,0,1.8952479362487793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,2,16384,32,0,0.6068906784057617
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,2,16384,64,0,0.3845119873682658
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,2,16384,2,0,7.55781364440918
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,2,16384,4,0,3.6886186599731445
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,2,16384,8,0,1.9375732739766438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,2,16384,2,0,7.94540278116862
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,2,16384,1,0,16.518661499023438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,2,16384,16,0,1.067519982655843
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,2,16384,32,0,0.609279990196228
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,16,1,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,2,16384,64,0,0.37802668412526447
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,16,2,0,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,16,4,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,16,16,0,0.01648533344268799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,16,8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,16,32,0,0.015989333391189575
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,16,64,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,16,1,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,16,4,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,16,2,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,16,8,0,0.016837333639462788
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,16,16,0,0.016597333053747814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,16,32,0,0.015626666446526844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,16,64,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,32,1,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,32,2,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,32,4,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,32,8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,32,16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,32,32,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,32,64,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,32,1,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,32,2,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,32,4,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,32,8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,32,32,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,32,64,0,0.015882667154073715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,64,1,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,64,2,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,64,4,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,64,8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,64,16,0,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,64,32,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,64,64,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,64,2,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,64,1,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,64,4,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,64,8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,64,16,0,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,64,32,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,64,64,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,128,1,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,128,2,0,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,128,8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,128,4,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,128,16,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,128,32,0,0.016906666258970898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,128,64,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,128,1,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,128,2,0,0.026922665536403656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,128,8,0,0.018853332847356796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,128,4,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,128,16,0,0.018522666146357853
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,128,32,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,2,16384,1,0,16.62550989786784
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,128,64,0,0.01701333373785019
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,256,4,0,0.02514133354028066
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,256,2,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,256,1,0,0.0675786683956782
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,256,16,0,0.021104000508785248
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,256,32,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,256,8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,256,64,0,0.020762667059898376
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,256,4,0,0.02518933266401291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,256,2,0,0.04029333343108495
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,256,1,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,256,8,0,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,256,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,256,32,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,256,64,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,512,4,0,0.04538666705290476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,512,2,0,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,512,8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,512,1,0,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,512,16,0,0.026288000245889027
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,512,64,0,0.025146665672461193
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,512,4,0,0.04710933566093445
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,512,2,0,0.07851733267307281
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,512,8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,512,1,0,0.1384106675783793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,512,16,0,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,512,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,512,64,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1024,4,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1024,2,0,0.17169066270192465
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1024,8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1024,16,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1024,32,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1024,64,0,0.03277866790692011
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,1024,1,0,0.31590400139490765
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,1024,4,0,0.09694400429725647
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,1024,2,0,0.17152533928553262
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,1024,16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,1024,8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,1024,32,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,1024,64,0,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,1024,1,0,0.3176106611887614
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1536,4,0,0.15889599919319153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1536,2,0,0.285866657892863
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1536,8,0,0.09591999650001526
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1536,16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1536,32,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1536,64,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,1536,1,0,0.5348693529764811
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,1536,2,0,0.2868799964586894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,1536,4,0,0.1609386702378591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,1536,16,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,1536,8,0,0.0962559978167216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,1536,32,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,1536,64,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,1536,1,0,0.5379413366317749
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,2048,4,0,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,2048,2,0,0.42342400550842285
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,2048,8,0,0.13397333025932312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,2048,32,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,2048,16,0,0.08431466420491536
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,2048,64,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,2048,1,0,0.799232006072998
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,2048,2,0,0.42683732509613037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,2048,4,0,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,2048,16,0,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,2048,8,0,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,2048,32,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,2048,64,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,2048,1,0,0.7985440095265707
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,3072,4,0,0.41300801436106366
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,3072,2,0,0.7789226373036703
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,3072,8,0,0.23142399390538534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,3072,32,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,3072,16,0,0.1397813359896342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,3072,64,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,3072,1,0,1.4445226987202961
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,32,16,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,3072,2,0,0.7478613058725992
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,3072,4,0,0.41437868277231854
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,3072,8,0,0.2310826579729716
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,3072,16,0,0.13857600092887878
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,3072,32,0,0.09181867043177287
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,3072,64,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,3072,1,0,1.5039092699686687
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,4096,4,0,0.6391573349634806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,4096,8,0,0.3595946629842122
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,4096,16,0,0.2071839968363444
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,4096,64,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,4096,32,0,0.13550933202107748
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,4096,2,0,1.2318720022837322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,4096,4,0,0.649727980295817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,4096,2,0,1.204906702041626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,4096,8,0,0.3572053511937459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,4096,1,0,2.3318187395731607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,4096,64,0,0.0846560001373291
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,4096,32,0,0.1358506679534912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,4096,16,0,0.21112000942230225
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,4096,1,0,2.2931200663248696
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,6144,4,0,1.2583253383636475
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,6144,16,0,0.3824746608734131
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,6144,8,0,0.6679893334706625
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,6144,32,0,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,6144,64,0,0.14592533310254416
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,6144,2,0,2.4246613184611
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,6144,4,0,1.2419413725535076
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,6144,8,0,0.6765226523081461
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,6144,2,0,2.3181653022766113
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,6144,1,0,4.571983973185222
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,6144,16,0,0.37905065218607586
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,6144,32,0,0.22869332631429037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,6144,64,0,0.1454080045223236
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,4,8192,4,0,2.2584373156229653
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,6144,1,0,4.688218752543132
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,4,8192,16,0,0.592906673749288
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,4,8192,8,0,1.0753706296284993
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,4,8192,32,0,0.35140268007914227
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,4,8192,64,0,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,4,8192,2,0,3.9406986236572266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,4,8192,4,0,1.990997314453125
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,4,8192,8,0,1.078447977701823
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,4,8192,2,0,4.202837308247884
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,4,8192,1,0,8.873141606648764
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,4,8192,16,0,0.5884586572647095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,4,8192,64,0,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,4,8192,32,0,0.34867199261983234
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,16,1,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,16,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,16,4,0,0.02089066555102666
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,16,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,16,16,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,16,32,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,16,64,0,0.016832000265518825
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,16,1,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,16,2,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,16,4,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,16,8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,16,16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,16,32,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,16,64,0,0.016597333053747814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,32,1,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,32,2,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,32,4,0,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,32,32,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,32,16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,32,64,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,32,1,0,0.04506133496761322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,32,2,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,32,4,0,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,32,8,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,32,16,0,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,32,32,0,0.016506666938463848
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,32,64,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,64,1,0,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,64,2,0,0.034128000338872276
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,64,4,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,64,8,0,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,64,16,0,0.01701333373785019
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,64,32,0,0.016602666427691776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,64,64,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,64,1,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,64,2,0,0.03481066723664602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,64,8,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,64,4,0,0.024506665766239166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,64,16,0,0.017349333812793095
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,64,32,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,64,64,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,128,2,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,128,1,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,128,4,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,128,8,0,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,128,16,0,0.018853332847356796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,128,32,0,0.018522666146357853
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,4,8192,1,0,8.422751744588217
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,128,64,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,128,4,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,128,2,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,128,1,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,128,8,0,0.020714666694402695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,128,16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,128,32,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,128,64,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,256,4,0,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,256,2,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,256,8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,256,1,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,256,16,0,0.02309333284695943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,256,32,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,256,64,0,0.020768000433842342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,256,4,0,0.03957866628964742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,256,2,0,0.06826133529345195
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,256,16,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,256,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,256,1,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,256,32,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,256,64,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,512,4,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,512,2,0,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,512,8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,512,16,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,512,32,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,512,64,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,512,1,0,0.25600000222524005
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,512,4,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,512,2,0,0.14114133516947427
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,512,8,0,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,512,16,0,0.030378667016824085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,512,32,0,0.02720000098148982
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,512,64,0,0.0258240004380544
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,512,1,0,0.258730669816335
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1024,4,0,0.17373865842819214
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1024,8,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1024,2,0,0.3199999928474426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1024,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1024,16,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1024,64,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,1024,1,0,0.6041599909464518
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,1024,2,0,0.31965865691502887
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,1024,4,0,0.1716853380203247
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,1024,16,0,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,1024,8,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,1024,32,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,1024,64,0,0.0341386670867602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,1024,1,0,0.6079146862030029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1536,4,0,0.29098665714263916
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1536,2,0,0.5393066803614298
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1536,8,0,0.16332266728083292
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1536,16,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1536,32,0,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1536,64,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,1536,1,0,1.02348796526591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,1536,2,0,0.5389653444290161
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,1536,4,0,0.285866657892863
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,1536,8,0,0.16332800189654031
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,1536,16,0,0.09761599699656169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,1536,64,0,0.045050665736198425
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,1536,32,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,1536,1,0,1.0344106356302898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,2048,4,0,0.4302560091018677
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,32,8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,2048,2,0,0.8060533205668131
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,2048,8,0,0.237226665019989
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,2048,32,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,2048,64,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,2048,1,0,1.5346345901489258
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,2048,2,0,0.7937653064727783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,2048,4,0,0.4230826695760091
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,2048,8,0,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,2048,16,0,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,2048,32,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,2048,64,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,2048,1,0,1.5517013867696126
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,3072,4,0,0.7611839771270752
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,3072,2,0,1.4762667020161946
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,3072,8,0,0.42052265008290607
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,3072,32,0,0.14643200238545737
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,3072,16,0,0.23654399315516153
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,3072,64,0,0.09870933492978413
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,3072,1,0,2.8810240427652993
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,3072,2,0,1.4527146021525066
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,3072,4,0,0.785749355951945
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,3072,8,0,0.4227413336435954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,3072,16,0,0.24065067370732626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,3072,32,0,0.14626666903495789
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,3072,64,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,3072,1,0,2.8122453689575195
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,8,4096,4,0,1.1840853691101074
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,2048,16,0,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,8,4096,2,0,2.36629327138265
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,8,4096,32,0,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,8,4096,8,0,0.646992007891337
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,8,4096,16,0,0.36266668637593585
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,8,4096,64,0,0.14164266983668009
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,8,4096,4,0,1.2393759886423747
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,8,4096,1,0,4.58734925587972
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,8,4096,8,0,0.6604799826939901
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,8,4096,16,0,0.36506132284800213
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,8,4096,32,0,0.21777600049972534
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,8,4096,64,0,0.14250666896502176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,16,1,0,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,8,4096,2,0,2.336597283681234
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,16,2,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,16,4,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,16,8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,16,16,0,0.017018667111794155
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,16,32,0,0.016447999825080235
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,16,64,0,0.01647466669480006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,16,1,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,16,4,0,0.028938665986061096
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,16,2,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,16,8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,16,16,0,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,16,32,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,16,64,0,0.0164533331990242
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,32,2,0,0.044719999035199486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,32,4,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,32,1,0,0.07679466903209686
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,32,8,0,0.02279466638962428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,32,16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,32,32,0,0.01691199963291486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,32,64,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,32,2,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,32,1,0,0.0778186668952306
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,32,4,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,32,16,0,0.018522666146357853
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,32,8,0,0.022698665658632915
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,32,32,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,32,64,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,64,4,0,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,64,2,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,64,1,0,0.09079999725023906
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,64,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,64,16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,64,32,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,64,64,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,64,4,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,64,2,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,64,1,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,64,8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,64,16,0,0.01860800012946129
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,64,32,0,0.016906666258970898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,64,64,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,128,4,0,0.04131199916203817
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,128,2,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,128,8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,128,16,0,0.01905599981546402
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,128,1,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,128,32,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,128,64,0,0.01860800012946129
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,128,4,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,128,2,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,128,8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,128,16,0,0.021312000850836437
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,128,1,0,0.11333866914113362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,128,32,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,128,64,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,8,4096,1,0,4.616533279418945
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,256,8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,256,4,0,0.0689386675755183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,256,32,0,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,256,16,0,0.025605333348115284
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,256,2,0,0.11809600392977397
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,256,64,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,256,1,0,0.22016000747680664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,256,4,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,256,16,0,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,256,2,0,0.11776533722877502
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,256,8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,256,32,0,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,256,64,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,256,1,0,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,512,4,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,512,8,0,0.0825973351796468
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,512,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,512,16,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,512,64,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,512,2,0,0.26232000192006427
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,512,4,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,512,1,0,0.49851731459299725
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,512,8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,512,2,0,0.2600959936777751
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,512,16,0,0.05120533208052317
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,512,32,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,512,64,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,512,1,0,0.49749334653218585
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1024,4,0,0.3210186759630839
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1024,8,0,0.17800533771514893
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1024,16,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1024,32,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1024,64,0,0.04061333338419596
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1024,2,0,0.6140586535135905
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,1024,4,0,0.32444266478220624
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,1024,8,0,0.1800533334414164
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,1024,1,0,1.1909120082855225
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,1024,2,0,0.6099679867426554
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,1024,64,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,1024,32,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,1024,16,0,0.10547199845314026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,1024,1,0,1.1854506333669026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1536,4,0,0.5471573273340861
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1536,16,0,0.16947199900945029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1536,8,0,0.2988373239835103
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1536,32,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1536,64,0,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1536,2,0,1.0364586512247722
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,1536,4,0,0.5457919836044312
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,1536,8,0,0.29713066418965656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,1536,2,0,1.0449919700622559
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,1536,1,0,2.012495994567871
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,1536,32,0,0.10443199674288432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,1536,16,0,0.16913066307703653
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,1536,64,0,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,16,2048,4,0,0.8002613385518392
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,1536,1,0,2.012159983317057
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,16,2048,16,0,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,16,2048,8,0,0.42923200130462646
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,16,2048,32,0,0.14711466431617737
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,16,2048,64,0,0.09658666451772054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,16,2048,2,0,1.564677397410075
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,16,2048,4,0,0.811514695485433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,16,2048,8,0,0.4346880118052165
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,16,2048,2,0,1.5261012713114421
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,16,2048,1,0,3.029168128967285
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,16,2048,16,0,0.24337067206700644
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,16,2048,64,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,16,2048,32,0,0.1461013356844584
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,32,16,1,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,16,4,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,16,2,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,16,16,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,16,8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,16,32,0,0.016885332763195038
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,16,64,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,16,2,0,0.06962666908899943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,32,16,1,0,0.12732799847920737
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,16,4,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,16,8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,16,16,0,0.021173333128293354
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,16,32,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,16,64,0,0.016469333320856094
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,32,2,0,0.07647466659545898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,32,4,0,0.04506133496761322
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,32,32,1,0,0.13516799608866373
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,32,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,32,8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,32,32,0,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,32,64,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,32,2,0,0.07644266883532207
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,32,4,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,32,32,1,0,0.1353386640548706
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,32,16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,32,8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,32,32,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,32,64,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,64,4,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,64,2,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,64,16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,64,8,0,0.034485332667827606
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,32,64,1,0,0.16247466206550598
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,64,32,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,64,64,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,64,4,0,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,64,2,0,0.08904000123341878
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,64,8,0,0.03412266572316488
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,32,64,1,0,0.16263999541600546
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,64,16,0,0.02515200028816859
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,64,32,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,64,64,0,0.01699200024207433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,16,2048,1,0,3.0103893280029297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,128,8,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,128,4,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,128,16,0,0.0286613330245018
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,128,32,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,128,2,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,128,64,0,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,32,128,1,0,0.2100800077120463
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,128,4,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,128,16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,128,2,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,128,8,0,0.041989331444104515
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,128,32,0,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,128,64,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,32,128,1,0,0.20958399772644043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,256,4,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,256,8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,256,32,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,256,16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,256,64,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,256,2,0,0.22254933913548788
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,256,4,0,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,32,256,1,0,0.41472001870473224
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,256,8,0,0.07270933190981548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,256,2,0,0.2228906750679016
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,256,16,0,0.0430026650428772
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,256,32,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,256,64,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,32,256,1,0,0.41779200236002606
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,512,4,0,0.2650453249613444
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,512,8,0,0.14729066689809164
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,512,32,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,512,16,0,0.08644266923268636
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,512,64,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,512,2,0,0.5067093372344971
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,512,4,0,0.2667520046234131
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,512,8,0,0.1474506656328837
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,32,512,1,0,0.9705706437428793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,512,2,0,0.5063680013020834
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,512,64,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,512,16,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,512,32,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,32,512,1,0,0.9770826498667399
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,32,1024,4,0,0.6157653331756592
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,32,1024,8,0,0.33160533507664997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,32,1024,16,0,0.18602667252222696
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,32,1024,32,0,0.11297600467999776
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,32,1024,64,0,0.07509866853555043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,32,1024,2,0,1.202191988627116
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,32,1024,4,0,0.6201973358790079
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,32,1024,2,0,1.1960426966349285
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,32,1024,8,0,0.33297065893809
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,32,1024,32,0,0.11400533715883891
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,32,1024,1,0,2.3569067319234214
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,32,1024,16,0,0.18619734048843384
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,32,1024,64,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,16,2,0,0.1276639997959137
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,16,4,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,64,16,1,0,0.2344906727472941
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,16,8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,16,16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,16,32,0,0.022463999688625336
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,16,64,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,16,4,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,16,2,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,64,16,1,0,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,16,8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,16,16,0,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,16,32,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,16,64,0,0.017130666722853977
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,32,4,0,0.07714666426181793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,32,2,0,0.135343998670578
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,32,16,0,0.03207999964555105
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,32,8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,64,32,1,0,0.2529279987017314
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,32,32,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,32,64,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,32,4,0,0.07714133461316426
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,32,2,0,0.13550933202107748
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,32,8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,32,16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,64,32,1,0,0.25224532683690387
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,32,32,0,0.02310933421055476
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,32,64,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,64,4,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,64,8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,64,2,0,0.15990933775901794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,32,1024,1,0,2.3528107007344565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,64,32,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,64,16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,64,64,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,64,64,1,0,0.31010667483011883
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,64,4,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,64,8,0,0.05427733560403188
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,64,32,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,64,16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,64,2,0,0.16145066420237222
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,64,64,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,64,64,1,0,0.30873600641886395
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,128,4,0,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,128,8,0,0.06963733335336049
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,128,16,0,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,128,32,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,128,2,0,0.21470399697621664
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,128,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,128,4,0,0.11639466881752014
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,64,128,1,0,0.40140799681345624
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,128,2,0,0.2146986722946167
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,128,8,0,0.06962666908899943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,128,16,0,0.044719999035199486
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,128,32,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,128,64,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,64,128,1,0,0.40174933274586994
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,256,4,0,0.2239146629969279
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,256,8,0,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,256,16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,256,32,0,0.050853331883748375
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,256,64,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,256,2,0,0.4288853406906128
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,256,4,0,0.22425599892934164
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,256,8,0,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,256,2,0,0.4227413336435954
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,64,256,1,0,0.8193706671396891
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,256,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,256,32,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,256,16,0,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,64,256,1,0,0.8186933199564616
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,64,512,4,0,0.5138773520787557
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,64,512,8,0,0.27323732773462933
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,64,512,16,0,0.15718400478363037
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,64,512,32,0,0.096261332432429
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,64,512,64,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,64,512,2,0,0.9910613695780436
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,64,512,4,0,0.5128533442815145
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,64,512,2,0,0.9921387036641439
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,64,512,8,0,0.27460267146428424
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,64,512,32,0,0.0962559978167216
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,64,512,1,0,1.93996795018514
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,64,512,16,0,0.1564959983030955
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,64,512,64,0,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,16,2,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,16,4,0,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,16,8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,128,16,1,0,0.4514133135477702
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,16,16,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,16,32,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,16,64,0,0.02279466638962428
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,16,4,0,0.12800000111262003
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,16,2,0,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,16,8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,16,16,0,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,128,16,1,0,0.4503893454869588
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,16,64,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,16,32,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,32,4,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,32,2,0,0.2542933424313863
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,32,8,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,32,16,0,0.04641066491603851
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,32,32,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,32,64,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,128,32,1,0,0.4896479845046997
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,32,4,0,0.1367039978504181
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,32,2,0,0.25361067056655884
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,32,8,0,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,64,512,1,0,1.9321173032124836
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,32,16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,32,32,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,32,64,0,0.02422933280467987
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,128,32,1,0,0.4889599879582723
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,64,8,0,0.09249599774678548
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,64,4,0,0.16265066464742026
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,64,16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,64,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,64,64,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,64,2,0,0.3063360055287679
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,64,4,0,0.16366933782895407
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,128,64,1,0,0.6055253346761068
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,64,2,0,0.30532266696294147
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,64,8,0,0.09284800291061401
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,64,16,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,64,32,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,64,64,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,128,64,1,0,0.6017813285191854
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,128,4,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,128,8,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,128,16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,128,32,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,128,64,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,128,2,0,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,128,4,0,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,128,8,0,0.12082667152086894
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,128,2,0,0.4106239875157674
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,128,128,1,0,0.7886506716410319
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,128,64,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,128,32,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,128,16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,128,128,1,0,0.7871146996816
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,128,256,4,0,0.4370773235956828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,128,256,8,0,0.2362026572227478
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,128,256,16,0,0.13550933202107748
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,128,256,32,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,128,256,64,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,128,256,2,0,0.8313173453013102
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,128,256,4,0,0.43332799275716144
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,128,256,2,0,0.8432640234629313
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,128,256,8,0,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,128,256,32,0,0.08566932876904805
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,128,256,1,0,1.6385706265767415
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,128,256,16,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,128,256,64,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,256,16,4,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,256,16,2,0,0.45311466852823895
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,256,16,8,0,0.12868266304334006
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,256,16,16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,256,16,32,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,256,16,1,0,0.8840533097585043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,256,16,64,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,256,16,4,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,256,16,2,0,0.4510720173517863
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,256,16,8,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,256,16,16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,256,16,32,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,256,16,64,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,256,16,1,0,0.8850879669189453
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,256,32,4,0,0.25597866376241046
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,128,256,1,0,1.6126292546590169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,256,32,2,0,0.4916906754175822
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,256,32,8,0,0.13772799571355185
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,256,32,16,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,256,32,32,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,256,32,64,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,256,32,1,0,0.9668213526407877
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,256,32,4,0,0.2553226749102275
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,256,32,16,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,256,32,8,0,0.1384106675783793
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,256,32,2,0,0.4913546641667684
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,256,32,32,0,0.04846400022506714
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,256,32,64,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,256,32,1,0,0.9661493301391602
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,256,64,4,0,0.3118079900741577
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,256,64,8,0,0.16844799121220908
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,256,64,16,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,256,64,32,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,256,64,64,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,256,64,2,0,0.5983573198318481
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,256,64,4,0,0.3114666740099589
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,256,64,8,0,0.16947199900945029
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,256,64,1,0,1.1926133632659912
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,256,64,2,0,0.5992693503697714
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,256,64,64,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,256,64,32,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,256,64,16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,256,64,1,0,1.1922826766967773
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,32,256,128,4,0,0.4191519816716512
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,16,256,128,8,0,0.22561599810918173
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,8,256,128,16,0,0.13226667046546936
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,4,256,128,32,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,2,256,128,64,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,64,256,128,2,0,0.803669293721517
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,32,256,128,4,0,0.41915734608968097
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,64,256,128,2,0,0.8016160329182943
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,16,256,128,8,0,0.22800532976786295
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,4,256,128,32,0,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,fp8,128,256,128,1,0,1.556144078572591
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,8,256,128,16,0,0.13157866398493448
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,2,256,128,64,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA GB200,mla_context,default,float16,float16,128,256,128,1,0,1.558186690012614
