framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,1,float16,float16,0,44.741292317708336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,1,fp8,fp8,0,33.12384033203125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,1,float16,fp8,0,44.98311869303385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,1,float16,float16,0,43.35377502441406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,1,float16,fp8,0,41.49094899495443
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,1,fp8,fp8,0,33.14466094970703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,1,float16,float16,0,43.88880411783854
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,1,float16,fp8,0,42.84723409016927
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,1,fp8,fp8,0,33.67388916015625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,128,1,float16,float16,0,22.895609537760418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,1,float16,float16,0,21.84380340576172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,1,float16,fp8,0,22.019930521647137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,128,1,float16,fp8,0,22.79747772216797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,1,fp8,fp8,0,16.806570688883465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,1,float16,float16,0,21.400746663411457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,1,float16,fp8,0,21.840726216634113
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,1,fp8,fp8,0,16.737621307373047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,1,float16,float16,0,22.881444295247395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,1,float16,fp8,0,21.920608520507812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,1,fp8,fp8,0,16.78916295369466
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,1,float16,float16,0,10.554037094116211
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,1,float16,float16,0,10.733402252197266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,1,float16,fp8,0,11.668139139811197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,1,float16,fp8,0,10.368687947591146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,1,fp8,fp8,0,8.626517613728842
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,1,fp8,fp8,0,8.932010650634766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,1,float16,float16,0,10.89963150024414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,1,float16,fp8,0,11.046571095784506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,1,fp8,fp8,0,8.648362477620443
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,1,float16,float16,0,10.868565877278646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,1,float16,fp8,0,11.168768564860025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,1,fp8,fp8,0,8.65774917602539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,1,float16,float16,0,5.339306513468425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,1,float16,float16,0,5.319850603739421
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,1,float16,fp8,0,5.375146865844727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,1,fp8,fp8,0,4.638719876607259
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,1,fp8,fp8,0,4.760234514872233
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,1,float16,fp8,0,5.36729621887207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,1,float16,float16,0,5.262170791625977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,1,float16,fp8,0,5.419008255004883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,1,fp8,fp8,0,4.6204531987508135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,1,float16,float16,0,5.2019093831380205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,1,float16,fp8,0,5.258069356282552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,1,fp8,fp8,0,4.633088111877441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,1,float16,float16,0,24.73455047607422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,1,float16,fp8,0,25.299285888671875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,1,fp8,fp8,0,19.675652821858723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,1,float16,float16,0,25.84131622314453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,1,float16,fp8,0,25.90857696533203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,1,fp8,fp8,0,19.74493916829427
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,1,float16,float16,0,25.492650349934895
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,1,float16,fp8,0,25.946795145670574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,1,fp8,fp8,0,19.771728515625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,1,float16,float16,0,13.518853505452475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,1,float16,float16,0,12.631380716959635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,1,float16,fp8,0,13.81427256266276
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,1,fp8,fp8,0,10.511706670125326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,1,float16,fp8,0,13.118293762207031
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,1,fp8,fp8,0,10.023082733154297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,1,float16,float16,0,13.131093343098959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,1,float16,fp8,0,13.070335388183594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,1,fp8,fp8,0,10.031242370605469
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,1,float16,float16,0,12.748463948567709
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,1,float16,fp8,0,12.673370361328125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,1,fp8,fp8,0,10.047999699910482
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,1,float16,float16,0,6.204928080240886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,1,float16,float16,0,6.252890904744466
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,1,float16,fp8,0,6.145365397135417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,1,float16,fp8,0,6.623066584269206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,1,fp8,fp8,0,5.241685231526692
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,1,fp8,fp8,0,5.462869644165039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,1,float16,float16,0,6.1561228434244795
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,1,float16,fp8,0,6.155951817830403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,1,fp8,fp8,0,5.254997253417969
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,1,float16,float16,0,6.087850570678711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,1,float16,fp8,0,6.0660050710042315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,1,fp8,fp8,0,5.26421324412028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,1,float16,float16,0,3.336714744567871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,1,float16,fp8,0,3.3749281565348306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,1,fp8,fp8,0,2.955952008565267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,1,float16,float16,0,3.239760080973307
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,1,fp8,fp8,0,2.852522532145182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,1,float16,fp8,0,3.1817334493001304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,1,float16,float16,0,3.1807146072387695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,1,float16,fp8,0,3.1882241566975913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,1,fp8,fp8,0,2.850133260091146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,1,float16,float16,0,3.186181386311849
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,1,float16,fp8,0,3.1933441162109375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,1,fp8,fp8,0,2.8555946350097656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,1,float16,float16,0,18.02137629191081
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,1,float16,fp8,0,18.182484944661457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,1,fp8,fp8,0,14.305109659830729
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,1,float16,float16,0,18.230613708496094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,1,float16,fp8,0,18.91429392496745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,1,fp8,fp8,0,14.360064188639322
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,1,float16,float16,0,18.582693735758465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,1,float16,fp8,0,18.379605611165363
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,1,fp8,fp8,0,14.378837585449219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,1,float16,float16,0,9.85976537068685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,1,float16,float16,0,8.90555191040039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,1,float16,fp8,0,9.015813191731771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,1,fp8,fp8,0,7.324160257975261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,1,float16,fp8,0,9.748992284138998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,1,fp8,fp8,0,7.725050608317058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,1,float16,float16,0,8.909482955932617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,1,float16,fp8,0,9.024176279703775
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,1,fp8,fp8,0,7.340373357137044
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,1,float16,float16,0,9.076735814412435
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,1,float16,fp8,0,9.215994517008463
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,1,fp8,fp8,0,7.3477121988932295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,1,float16,float16,0,4.539562543233235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,1,float16,float16,0,4.419072151184082
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,1,float16,fp8,0,4.514314651489258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,1,fp8,fp8,0,3.8521172205607095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,1,fp8,fp8,0,4.037472089131673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,1,float16,fp8,0,4.682245254516602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,1,float16,float16,0,4.431866645812988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,1,float16,fp8,0,4.355072021484375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,1,fp8,fp8,0,3.860992113749186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,1,float16,float16,0,4.434432029724121
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,1,float16,fp8,0,4.349610646565755
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,1,fp8,fp8,0,3.8715734481811523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,1,float16,float16,0,2.4690346717834473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,1,float16,fp8,0,2.496682643890381
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,1,float16,float16,0,2.394965330759684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,1,fp8,fp8,0,2.205018679300944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,1,float16,fp8,0,2.3734560012817383
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,1,fp8,fp8,0,2.118314743041992
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,1,float16,float16,0,2.3690187136332193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,1,float16,fp8,0,2.376522699991862
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,1,fp8,fp8,0,2.1220693588256836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,1,float16,float16,0,2.378917376200358
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,1,float16,fp8,0,2.4024693171183267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,1,fp8,fp8,0,2.12718931833903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,1,float16,float16,0,24.787317911783855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,1,float16,fp8,0,25.09124247233073
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,1,fp8,fp8,0,19.416239420572918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,1,float16,float16,0,25.320106506347656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,1,float16,fp8,0,24.197797139485676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,1,fp8,fp8,0,19.42425537109375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,1,float16,float16,0,25.435818990071613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,1,float16,fp8,0,24.808619181315105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,1,fp8,fp8,0,19.51470947265625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,128,1,float16,float16,0,13.903695424397787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,1,float16,float16,0,11.744773864746094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,128,1,float16,fp8,0,13.063685099283854
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,1,float16,fp8,0,12.27520497639974
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,1,fp8,fp8,0,9.786709467569986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,1,float16,float16,0,12.559706370035807
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,1,float16,fp8,0,12.07757314046224
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,1,fp8,fp8,0,9.805482864379883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,1,float16,float16,0,12.352682749430338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,1,float16,fp8,0,11.990186055501303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,1,fp8,fp8,0,9.838421503702799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,1,float16,float16,0,5.610837300618489
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,1,float16,float16,0,6.1158402760823565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,1,float16,fp8,0,5.989205042521159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,1,fp8,fp8,0,5.357744216918945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,1,float16,fp8,0,5.90660285949707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,1,fp8,fp8,0,5.042517344156901
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,1,float16,float16,0,5.624149322509766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,1,float16,fp8,0,5.7221120198567705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,1,fp8,fp8,0,5.05241584777832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,1,float16,float16,0,5.8832213083903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,1,float16,fp8,0,5.726378758748372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,1,fp8,fp8,0,5.069482803344727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,1,float16,float16,0,3.166378657023112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,1,float16,float16,0,2.9895734786987305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,1,float16,fp8,0,2.9583358764648438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,1,fp8,fp8,0,2.6712640126546225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,1,float16,fp8,0,3.1994880040486655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,1,fp8,fp8,0,2.8224852879842124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,1,float16,float16,0,2.9987732569376626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,1,float16,fp8,0,3.0059518814086914
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,1,fp8,fp8,0,2.681856155395508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,1,float16,float16,0,3.009370803833008
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,1,float16,fp8,0,3.0100479125976562
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,1,fp8,fp8,0,2.6832052866617837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,1,float16,float16,0,1.7641812960306804
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,1,float16,fp8,0,1.7341440518697102
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,1,fp8,fp8,0,1.573893388112386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,1,float16,float16,0,1.6940266291300456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,1,float16,fp8,0,1.6783359845479329
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,1,fp8,fp8,0,1.5049386024475098
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,1,float16,float16,0,1.673898696899414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,1,float16,fp8,0,1.6715092658996582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,1,fp8,fp8,0,1.5028907457987468
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,1,float16,float16,0,1.7218559583028157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,1,float16,fp8,0,1.712981383005778
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,1,fp8,fp8,0,1.509376049041748
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,1,float16,float16,0,14.71624501546224
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,1,float16,fp8,0,14.797653198242188
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,1,fp8,fp8,0,12.088826497395834
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,1,float16,float16,0,14.167898813883463
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,1,float16,fp8,0,14.439076741536459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,1,fp8,fp8,0,12.099413553873697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,1,float16,float16,0,14.451541900634766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,1,float16,fp8,0,15.070378621419271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,1,fp8,fp8,0,12.146517435709635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,1,float16,float16,0,7.619071960449219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,1,float16,float16,0,6.896298726399739
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,1,float16,fp8,0,7.696053187052409
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,1,float16,fp8,0,6.875477472941081
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,1,fp8,fp8,0,6.127440134684245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,1,fp8,fp8,0,6.610815684000651
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,1,float16,float16,0,7.257434844970703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,1,float16,fp8,0,6.7908376057942705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,1,fp8,fp8,0,6.171477635701497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,1,float16,float16,0,7.308458964029948
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,1,float16,fp8,0,6.939477284749349
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,1,fp8,fp8,0,6.161242802937825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,1,float16,float16,0,3.483306566874186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,1,float16,float16,0,3.7850399017333984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,1,float16,fp8,0,3.789653460184733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,1,float16,fp8,0,3.479893366495768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,1,fp8,fp8,0,3.413680076599121
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,1,fp8,fp8,0,3.1766185760498047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,1,float16,float16,0,3.5566933949788413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,1,float16,fp8,0,3.5710293451944985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,1,fp8,fp8,0,3.1800320943196616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,1,float16,float16,0,3.557375907897949
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,1,float16,fp8,0,3.5833279291788735
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,1,fp8,fp8,0,3.196415901184082
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,1,float16,float16,0,1.9968105951944988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,1,float16,fp8,0,2.0305919647216797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,1,float16,float16,0,1.9061867396036785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,1,float16,fp8,0,1.876479943593343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,1,fp8,fp8,0,1.8187840779622395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,1,fp8,fp8,0,1.7095680236816406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,1,float16,float16,0,1.858730634053548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,1,float16,fp8,0,1.8665812810262044
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,1,fp8,fp8,0,1.7095786730448406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,1,float16,float16,0,1.8665812810262044
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,1,float16,fp8,0,1.877845287322998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,1,fp8,fp8,0,1.714687983194987
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,1,float16,float16,0,1.1221333344777424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,1,float16,fp8,0,1.1088213125864665
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,1,fp8,fp8,0,1.0275839964548747
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,1,float16,float16,0,1.0821973482767742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,1,float16,fp8,0,1.0828693707784016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,1,fp8,fp8,0,0.9763840039571127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,1,float16,float16,0,1.0927733580271404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,1,float16,fp8,0,1.0651360352834065
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,1,fp8,fp8,0,0.9757013320922852
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,1,float16,float16,0,1.0746880372365315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,1,float16,fp8,0,1.0996106465657551
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,1,fp8,fp8,0,0.97979736328125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,1,float16,float16,0,15.172779083251953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,1,float16,fp8,0,14.842032114664713
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,1,fp8,fp8,0,12.962987263997396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,1,float16,float16,0,15.549781799316406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,1,float16,fp8,0,15.007914225260416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,1,fp8,fp8,0,13.003946940104166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,1,float16,float16,0,14.876324971516928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,1,float16,fp8,0,14.773765563964844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,1,fp8,fp8,0,13.121194203694662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,1,float16,float16,0,7.033173243204753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,1,float16,float16,0,7.781375885009766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,1,float16,fp8,0,8.02402114868164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,1,float16,fp8,0,7.01031494140625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,1,fp8,fp8,0,6.494037628173828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,1,float16,float16,0,7.035909016927083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,1,float16,fp8,0,7.058432261149089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,1,fp8,fp8,0,6.508544286092122
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,1,float16,float16,0,7.034074783325195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,1,float16,fp8,0,7.416666666666667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,1,fp8,fp8,0,6.548816045125325
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,1,float16,float16,0,3.5604480107625327
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,1,float16,float16,0,3.8640638987223306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,1,float16,fp8,0,3.9645865758260093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,1,fp8,fp8,0,3.650218645731608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,1,float16,fp8,0,3.5631786982218423
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,1,fp8,fp8,0,3.3235626220703125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,1,float16,float16,0,3.5915091832478843
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,1,float16,fp8,0,3.590826670328776
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,1,fp8,fp8,0,3.3382399876912436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,1,float16,float16,0,3.6106239954630532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,1,float16,fp8,0,3.6157439549764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,1,fp8,fp8,0,3.3542826970418296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,1,float16,float16,0,2.00602134068807
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,1,float16,float16,0,1.8653866449991863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,1,float16,fp8,0,2.1009066899617515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,1,float16,fp8,0,1.8908106486002605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,1,fp8,fp8,0,1.7450666427612305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,1,fp8,fp8,0,1.8990079561869304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,1,float16,float16,0,1.881600062052409
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,1,float16,fp8,0,1.9198293685913086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,1,fp8,fp8,0,1.7546292940775554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,1,float16,float16,0,1.9082239468892415
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,1,float16,fp8,0,1.894917329152425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,1,fp8,fp8,0,1.7628159523010254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,1,float16,float16,0,1.1129173437754314
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,1,float16,fp8,0,1.1279359658559163
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,1,fp8,fp8,0,1.0344106356302898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,1,float16,float16,0,1.0477226575215657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,1,float16,fp8,0,1.0268959999084473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,1,fp8,fp8,0,0.960693359375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,1,float16,float16,0,1.0579626560211182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,1,float16,fp8,0,1.0610240300496419
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,1,fp8,fp8,0,0.9613653024037679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,1,float16,float16,0,1.0589866638183594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,1,float16,fp8,0,1.0637653668721516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,1,fp8,fp8,0,0.965119997660319
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,1,float16,float16,0,0.6456319888432821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,1,float16,fp8,0,0.655189315478007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,1,fp8,fp8,0,0.5990346670150757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,1,float16,float16,0,0.6174720128377279
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,1,float16,fp8,0,0.6198666493097941
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,1,fp8,fp8,0,0.5642240047454834
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,1,float16,float16,0,0.6089386542638143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,1,float16,fp8,0,0.6106453339258829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,1,fp8,fp8,0,0.5628693501154581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,1,float16,float16,0,0.6109866698582967
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,1,float16,fp8,0,0.6137173175811768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,1,fp8,fp8,0,0.5659306844075521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,1,float16,float16,0,9.233919779459635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,1,float16,fp8,0,9.255765279134115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,1,fp8,fp8,0,8.678234736124674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,1,float16,float16,0,9.472506841023764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,1,float16,fp8,0,9.305765151977539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,1,fp8,fp8,0,8.71066157023112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,1,float16,float16,0,9.43837865193685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,1,float16,fp8,0,9.43121083577474
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,1,fp8,fp8,0,8.770730972290039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,1,float16,float16,0,5.017087936401367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,1,float16,float16,0,4.635818799336751
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,1,float16,fp8,0,4.59605344136556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,1,fp8,fp8,0,4.382549285888672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,1,float16,fp8,0,5.08246390024821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,1,fp8,fp8,0,4.828336079915364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,1,float16,float16,0,4.6170454025268555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,1,float16,fp8,0,4.6105600992838545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,1,fp8,fp8,0,4.385626792907715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,1,float16,float16,0,4.632063865661621
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,1,float16,fp8,0,4.630698521931966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,1,fp8,fp8,0,4.401317278544108
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,1,float16,float16,0,2.3652745882670083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,1,float16,float16,0,2.5553919474283853
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,1,float16,fp8,0,2.366975943247477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,1,fp8,fp8,0,2.4901973406473794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,1,float16,fp8,0,2.594645341237386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,1,fp8,fp8,0,2.2464906374613443
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,1,float16,float16,0,2.375509262084961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,1,float16,fp8,0,2.3734614054361978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,1,fp8,fp8,0,2.2543360392252603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,1,float16,float16,0,2.3833600680033364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,1,float16,fp8,0,2.387455940246582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,1,fp8,fp8,0,2.2809653282165527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,1,float16,float16,0,1.3407573699951172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,1,float16,fp8,0,1.362602710723877
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,1,float16,float16,0,1.2706133524576824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,1,fp8,fp8,0,1.3040640354156494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,1,float16,fp8,0,1.2682240009307861
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,1,fp8,fp8,0,1.1868159770965576
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,1,float16,float16,0,1.2726613680521648
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,1,float16,fp8,0,1.2682240009307861
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,1,fp8,fp8,0,1.1960266431172688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,1,float16,float16,0,1.2706133524576824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,1,float16,fp8,0,1.2852853139241536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,1,fp8,fp8,0,1.1943199634552002
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,1,float16,float16,0,0.7529866695404053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,1,float16,fp8,0,0.7577173709869385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,1,fp8,fp8,0,0.7164533138275146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,1,float16,float16,0,0.7089440027872721
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,1,float16,fp8,0,0.7086133162180582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,1,fp8,fp8,0,0.6611520051956177
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,1,float16,float16,0,0.7062186400095621
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,1,float16,fp8,0,0.7017866770426432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,1,fp8,fp8,0,0.6625333229700724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,1,float16,float16,0,0.7072479724884033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,1,float16,fp8,0,0.7089493274688721
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,1,fp8,fp8,0,0.6655999819437662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,1,float16,float16,0,0.4411733150482178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,1,float16,fp8,0,0.44970134894053143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,1,fp8,fp8,0,0.4227360089619954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,1,float16,float16,0,0.41881601015726727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,1,float16,fp8,0,0.41881601015726727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,1,fp8,fp8,0,0.3949226538340251
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,1,float16,float16,0,0.4220586617787679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,1,float16,fp8,0,0.4237706661224365
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,1,fp8,fp8,0,0.3956000010172526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,1,float16,float16,0,0.4254719813664754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,1,float16,fp8,0,0.4271786610285441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,1,fp8,fp8,0,0.3997013171513875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,1,float16,float16,0,10.605215708414713
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,1,float16,fp8,0,10.631338755289713
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,1,fp8,fp8,0,10.380287806193033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,1,float16,float16,0,10.618368148803711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,1,float16,fp8,0,10.631680170694986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,1,fp8,fp8,0,10.431146621704102
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,1,float16,float16,0,10.658304214477539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,1,float16,fp8,0,10.66001065572103
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,1,fp8,fp8,0,10.53218142191569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,1,float16,float16,0,5.294591903686523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,128,1,float16,float16,0,5.771605173746745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,128,1,float16,fp8,0,5.8199036916097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,1,float16,fp8,0,5.292885462443034
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,1,fp8,fp8,0,5.174442609151204
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,1,float16,float16,0,5.282304128011067
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,1,float16,fp8,0,5.287082672119141
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,1,fp8,fp8,0,5.208234786987305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,1,float16,float16,0,5.310981432596843
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,1,float16,fp8,0,5.31711991628011
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,1,fp8,fp8,0,5.217450777689616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,1,float16,float16,0,2.924031893412272
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,1,float16,fp8,0,2.9487787882486978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,1,float16,float16,0,2.6671787897745767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,1,fp8,fp8,0,2.944682757059733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,1,float16,fp8,0,2.689023971557617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,1,fp8,fp8,0,2.623487949371338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,1,float16,float16,0,2.69380251566569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,1,float16,fp8,0,2.6825386683146157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,1,fp8,fp8,0,2.6333866119384766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,1,float16,float16,0,2.69926389058431
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,1,float16,fp8,0,2.693120002746582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,1,fp8,fp8,0,2.6480639775594077
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,1,float16,float16,0,1.5076692899068196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,1,float16,float16,0,1.3899146715799968
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,1,float16,fp8,0,1.3892265955607097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,1,float16,fp8,0,1.530186653137207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,1,fp8,fp8,0,1.3666987419128418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,1,fp8,fp8,0,1.5213227272033691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,1,float16,float16,0,1.3984427452087402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,1,float16,fp8,0,1.3998079299926758
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,1,fp8,fp8,0,1.3677226702372234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,1,float16,float16,0,1.405951976776123
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,1,float16,fp8,0,1.3987785975138347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,1,fp8,fp8,0,1.3806932767232258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,1,float16,float16,0,0.8036800225575765
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,1,float16,fp8,0,0.8132266998291016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,1,fp8,fp8,0,0.8046987056732178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,1,float16,float16,0,0.7437600294748942
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,1,float16,fp8,0,0.7430826822916666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,1,fp8,fp8,0,0.726698637008667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,1,float16,float16,0,0.7458186944325765
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,1,float16,fp8,0,0.7475199699401855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,1,fp8,fp8,0,0.7277226448059082
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,1,float16,float16,0,0.7505866686503092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,1,float16,fp8,0,0.7512799898783366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,1,fp8,fp8,0,0.731818675994873
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,1,float16,float16,0,0.4500480095545451
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,1,float16,fp8,0,0.45823466777801514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,1,fp8,fp8,0,0.4455999930699666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,1,float16,float16,0,0.41812801361083984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,1,float16,fp8,0,0.41915734608968097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,1,fp8,fp8,0,0.4078933397928874
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,1,float16,float16,0,0.419866681098938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,1,float16,fp8,0,0.4217280149459839
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,1,fp8,fp8,0,0.41096532344818115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,1,float16,float16,0,0.4230826695760091
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,1,float16,fp8,0,0.4251306851704915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,1,fp8,fp8,0,0.41232534249623615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,1,float16,float16,0,0.27187200387318927
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,1,float16,fp8,0,0.2783573269844055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,1,fp8,fp8,0,0.26709334055582684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,1,float16,float16,0,0.2515626748402913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,1,float16,fp8,0,0.2525866627693176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,1,fp8,fp8,0,0.2474613388379415
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,1,float16,float16,0,0.2488266626993815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,1,float16,fp8,0,0.2501973311106364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,1,fp8,fp8,0,0.24541866779327393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,1,float16,float16,0,0.25088000297546387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,1,float16,fp8,0,0.25361067056655884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,1,fp8,fp8,0,0.24848000208536783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,1,float16,float16,0,7.303194681803386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,1,float16,fp8,0,7.305733362833659
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,1,fp8,fp8,0,7.33952522277832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,1,float16,float16,0,7.333375930786133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,1,float16,fp8,0,7.332010904947917
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,1,fp8,fp8,0,7.37059211730957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,1,float16,float16,0,7.3729705810546875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,1,float16,fp8,0,7.371599833170573
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,1,fp8,fp8,0,7.425194422403972
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,1,float16,float16,0,4.022442817687988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,1,float16,float16,0,3.662165323893229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,1,float16,fp8,0,4.06546147664388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,1,float16,fp8,0,3.647141456604004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,1,fp8,fp8,0,3.6759894688924155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,1,fp8,fp8,0,4.1291147867838545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,1,float16,float16,0,3.663189252217611
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,1,float16,fp8,0,3.6679681142171225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,1,fp8,fp8,0,3.6879361470540366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,1,float16,float16,0,3.6845226287841797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,1,float16,fp8,0,3.6875839233398438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,1,fp8,fp8,0,3.7135467529296875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,1,float16,float16,0,2.0435733795166016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,1,float16,float16,0,1.8649333318074544
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,1,float16,fp8,0,1.87118927637736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,1,fp8,fp8,0,1.8773333231608074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,1,float16,fp8,0,2.073941389719645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,1,fp8,fp8,0,2.100224018096924
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,1,float16,float16,0,1.8688000043233235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,1,float16,fp8,0,1.8725600242614746
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,1,fp8,fp8,0,1.8800640106201172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,1,float16,float16,0,1.87391996383667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,1,float16,fp8,0,1.8769920667012532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,1,fp8,fp8,0,1.8894507090250652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,1,float16,float16,0,1.0586453278859456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,1,float16,fp8,0,1.0794666608174641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,1,fp8,fp8,0,0.9777546723683676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,1,float16,float16,0,0.9787733554840088
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,1,float16,fp8,0,0.9784320195515951
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,1,fp8,fp8,0,1.0927786827087402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,1,float16,float16,0,0.9780906836191813
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,1,float16,fp8,0,0.9787733554840088
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,1,fp8,fp8,0,0.9821866353352865
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,1,float16,float16,0,0.981503963470459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,1,float16,fp8,0,0.98798934618632
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,1,fp8,fp8,0,0.9907200336456299
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,1,float16,float16,0,0.5693440039952596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,1,float16,fp8,0,0.5799253384272257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,1,fp8,fp8,0,0.583679993947347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,1,float16,float16,0,0.5263359944025675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,1,float16,fp8,0,0.5259679953257242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,1,fp8,fp8,0,0.5276960134506226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,1,float16,float16,0,0.5259946584701538
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,1,float16,fp8,0,0.5280426740646362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,1,fp8,fp8,0,0.5287253459294637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,1,float16,float16,0,0.5304319858551025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,1,float16,fp8,0,0.53111465771993
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,1,fp8,fp8,0,0.5317973295847574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,1,float16,float16,0,0.3237546682357788
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,1,float16,fp8,0,0.3295573393503825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,1,fp8,fp8,0,0.32785600423812866
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,1,float16,float16,0,0.29713066418965656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,1,float16,fp8,0,0.2988426685333252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,1,fp8,fp8,0,0.2995199958483378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,1,float16,float16,0,0.29849066336949664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,1,float16,fp8,0,0.30053333441416424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,1,fp8,fp8,0,0.3022506634394328
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,1,float16,float16,0,0.30190932750701904
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,1,float16,fp8,0,0.303274671236674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,1,fp8,fp8,0,0.30429865916570026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,1,float16,float16,0,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,1,float16,fp8,0,0.20548266172409058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,1,fp8,fp8,0,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,1,float16,float16,0,0.1848319967587789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,1,float16,fp8,0,0.1858560045560201
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,1,fp8,fp8,0,0.18244266510009766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,1,float16,float16,0,0.18141865730285645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,1,float16,fp8,0,0.18449066082636514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,1,fp8,fp8,0,0.18141865730285645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,1,float16,float16,0,0.18210667371749878
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,1,float16,fp8,0,0.1848319967587789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,1,fp8,fp8,0,0.18464533487955728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,1,float16,float16,0,8.396959940592447
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,1,float16,fp8,0,8.36573855082194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,1,fp8,fp8,0,8.8362668355306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,1,float16,float16,0,8.449706395467123
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,1,float16,fp8,0,8.495786666870117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,1,fp8,fp8,0,9.237509409586588
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,1,float16,float16,0,8.550912221272787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,1,float16,fp8,0,8.51421864827474
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,1,fp8,fp8,0,9.278122584025065
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,1,float16,float16,0,4.086789449055989
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,128,1,float16,float16,0,4.719104131062825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,128,1,float16,fp8,0,4.62660280863444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,1,float16,fp8,0,4.068357467651367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,1,fp8,fp8,0,4.4301652908325195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,1,float16,float16,0,4.145834604899089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,1,float16,fp8,0,4.180639902750651
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,1,fp8,fp8,0,4.6059573491414385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,1,float16,float16,0,4.25164794921875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,1,float16,fp8,0,4.262570699055989
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,1,fp8,fp8,0,4.584959983825684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,1,float16,float16,0,2.344618638356527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,1,float16,fp8,0,2.2939252853393555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,1,fp8,fp8,0,2.4506026903788247
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,1,float16,float16,0,2.0497066179911294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,1,float16,fp8,0,2.0592640240987143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,1,fp8,fp8,0,2.184528032938639
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,1,float16,float16,0,2.0674452781677246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,1,float16,fp8,0,2.0626559257507324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,1,fp8,fp8,0,2.308095932006836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,1,float16,float16,0,2.0818079312642417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,1,float16,fp8,0,2.0831573804219565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,1,fp8,fp8,0,2.2840320269266763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,1,float16,float16,0,1.1692372957865398
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,1,float16,float16,0,1.046015977859497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,1,float16,fp8,0,1.047381321589152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,1,float16,fp8,0,1.1504639784495037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,1,fp8,fp8,0,1.2311893304189045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,1,fp8,fp8,0,1.0866399606068928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,1,float16,float16,0,1.0473919709523518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,1,float16,fp8,0,1.0511306921641033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,1,fp8,fp8,0,1.1122346719106038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,1,float16,float16,0,1.0589866638183594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,1,float16,fp8,0,1.0592479705810547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,1,fp8,fp8,0,1.1173546314239502
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,1,float16,float16,0,0.5993813276290894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,1,float16,fp8,0,0.587775985399882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,1,fp8,fp8,0,0.6307839949925741
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,1,float16,float16,0,0.5358933210372925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,1,float16,fp8,0,0.5376000006993612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,1,fp8,fp8,0,0.556714653968811
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,1,float16,float16,0,0.5362346569697062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,1,float16,fp8,0,0.535210649172465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,1,fp8,fp8,0,0.559445341428121
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,1,float16,float16,0,0.5403253237406412
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,1,float16,fp8,0,0.5399893522262573
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,1,fp8,fp8,0,0.5683199961980184
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,1,float16,float16,0,0.3131733338038127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,1,float16,fp8,0,0.30770132939020794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,1,fp8,fp8,0,0.32819199562072754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,1,float16,float16,0,0.2821120023727417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,1,float16,fp8,0,0.2821120023727417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,1,fp8,fp8,0,0.2903040051460266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,1,float16,float16,0,0.2821120023727417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,1,float16,fp8,0,0.2845013340314229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,1,fp8,fp8,0,0.2923520008722941
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,1,float16,float16,0,0.2855253418286641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,1,float16,fp8,0,0.28517866134643555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,1,fp8,fp8,0,0.29713066418965656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,1,float16,float16,0,0.1716853380203247
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,1,float16,fp8,0,0.16980799039204916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,1,fp8,fp8,0,0.17851734161376953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,1,float16,float16,0,0.1520639955997467
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,1,float16,fp8,0,0.15035733580589294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,1,fp8,fp8,0,0.15496533115704855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,1,float16,float16,0,0.15103999773661295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,1,float16,fp8,0,0.15001599987347922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,1,fp8,fp8,0,0.15428266922632852
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,1,float16,float16,0,0.15222932895024618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,1,float16,fp8,0,0.15307733416557312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,1,fp8,fp8,0,0.15889066457748413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,1,float16,float16,0,0.10000000397364299
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,1,float16,fp8,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,1,fp8,fp8,0,0.10239467024803162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,1,float16,float16,0,0.08738133311271667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,1,float16,fp8,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,1,fp8,fp8,0,0.08738666772842407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,1,float16,float16,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,1,fp8,fp8,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,1,float16,float16,0,7.4415842692057295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,1,float16,fp8,0,7.359312057495117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,1,fp8,fp8,0,7.91978645324707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,1,float16,float16,0,7.472645441691081
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,1,float16,fp8,0,7.459327697753906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,1,fp8,fp8,0,8.297813415527344
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,1,float16,fp8,0,7.547903696695964
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,1,float16,float16,0,7.552853266398112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,1,fp8,fp8,0,8.275626500447592
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,1,float16,float16,0,3.608234723409017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,128,1,float16,float16,0,4.219903945922852
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,128,1,float16,fp8,0,4.121941248575847
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,1,fp8,fp8,0,3.9563945134480796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,1,float16,fp8,0,3.6031147638956704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,1,float16,float16,0,3.6729227701822915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,1,float16,fp8,0,3.6865708033243814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,1,fp8,fp8,0,4.139349301656087
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,1,float16,float16,0,3.7667840321858725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,1,float16,fp8,0,3.7742932637532554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,1,fp8,fp8,0,4.116138776143392
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,1,float16,float16,0,1.8143572807312012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,1,float16,float16,0,2.110976060231527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,1,float16,fp8,0,2.0493599573771157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,1,float16,fp8,0,1.8109439214070637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,1,fp8,fp8,0,2.2111573219299316
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,1,fp8,fp8,0,1.9293866157531738
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,1,float16,float16,0,1.8191253344217937
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,1,float16,fp8,0,1.819818655649821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,1,fp8,fp8,0,2.063701311747233
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,1,float16,float16,0,1.8413227399190266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,1,float16,fp8,0,1.8387573560078938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,1,fp8,fp8,0,2.035711924235026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,1,float16,float16,0,1.0395306746164958
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,1,float16,fp8,0,1.0251946449279785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,1,float16,float16,0,0.9181866645812988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,1,float16,fp8,0,0.9171626567840576
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,1,fp8,fp8,0,0.9767146905263265
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,1,fp8,fp8,0,1.1166773637135823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,1,float16,float16,0,0.9227946599324545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,1,float16,fp8,0,0.9207572937011719
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,1,fp8,fp8,0,0.9883306821187338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,1,float16,float16,0,0.9320106506347656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,1,float16,fp8,0,0.9306453069051107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,1,fp8,fp8,0,1.005738655726115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,1,float16,float16,0,0.532480001449585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,1,float16,fp8,0,0.5234346787134806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,1,fp8,fp8,0,0.5720746517181396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,1,float16,float16,0,0.4708746671676636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,1,float16,fp8,0,0.4729173183441162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,1,fp8,fp8,0,0.49885865052541095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,1,float16,float16,0,0.4753066698710124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,1,float16,fp8,0,0.47496533393859863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,1,fp8,fp8,0,0.5015893379847208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,1,float16,float16,0,0.4787199894587199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,1,float16,fp8,0,0.4776959816614787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,1,fp8,fp8,0,0.5104639927546183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,1,float16,float16,0,0.2821120023727417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,1,float16,fp8,0,0.27562665939331055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,1,fp8,fp8,0,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,1,float16,float16,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,1,float16,fp8,0,0.24849067131678262
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,1,fp8,fp8,0,0.2604373296101888
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,1,float16,float16,0,0.24849067131678262
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,1,float16,fp8,0,0.24644800027211508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,1,fp8,fp8,0,0.2611200014750163
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,1,float16,float16,0,0.24985599517822266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,1,float16,fp8,0,0.2501973311106364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,1,fp8,fp8,0,0.26606400807698566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,1,float16,float16,0,0.15205867091814676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,1,float16,fp8,0,0.1479680041472117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,1,fp8,fp8,0,0.1616213321685791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,1,float16,float16,0,0.13226667046546936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,1,float16,fp8,0,0.13226667046546936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,1,fp8,fp8,0,0.13942933082580566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,1,float16,float16,0,0.13209600249926248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,1,float16,fp8,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,1,fp8,fp8,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,1,float16,float16,0,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,1,float16,fp8,0,0.13414399822553
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,1,fp8,fp8,0,0.14199999968210855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,1,float16,float16,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,1,float16,fp8,0,0.08499200145403545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,1,fp8,fp8,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,1,float16,float16,0,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,1,float16,fp8,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,1,fp8,fp8,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,1,float16,float16,0,0.07543999950091045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,1,float16,fp8,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,1,fp8,fp8,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,1,float16,float16,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,1,fp8,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,1,float16,fp8,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,1,fp8,fp8,0,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,1,float16,float16,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,1,float16,float16,0,0.04814933240413666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,1,float16,fp8,0,0.04845866560935974
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,1,float16,float16,0,2.721109390258789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,1,float16,fp8,0,2.721791903177897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,1,fp8,fp8,0,2.9271039962768555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,1,float16,float16,0,2.811903953552246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,1,float16,fp8,0,2.8207785288492837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,1,fp8,fp8,0,3.094357490539551
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,1,float16,float16,0,2.855600039164225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,1,float16,fp8,0,2.8746986389160156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,1,fp8,fp8,0,3.091279983520508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,1,float16,float16,0,1.3875199953715007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,1,float16,float16,0,1.6614452997843425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,1,float16,fp8,0,1.3707946141560872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,1,float16,fp8,0,1.6139893531799316
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,1,fp8,fp8,0,1.6964267094930012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,1,fp8,fp8,0,1.4216480255126953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,1,float16,float16,0,1.395370642344157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,1,float16,fp8,0,1.3875253995259602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,1,fp8,fp8,0,1.5476053555806477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,1,float16,float16,0,1.422335942586263
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,1,float16,fp8,0,1.4137919743855794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,1,fp8,fp8,0,1.5315574010213215
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,1,float16,float16,0,0.8388266563415527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,1,float16,float16,0,0.7021173636118571
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,1,float16,fp8,0,0.8166399796803793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,1,float16,fp8,0,0.7051946322123209
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,1,fp8,fp8,0,0.7140693664550781
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,1,fp8,fp8,0,0.8581120173136393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,1,float16,float16,0,0.7069013118743896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,1,float16,fp8,0,0.7075786590576172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,1,fp8,fp8,0,0.7222613493601481
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,1,float16,float16,0,0.7157759666442871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,1,float16,fp8,0,0.7144266764322916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,1,fp8,fp8,0,0.7427413463592529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,1,float16,float16,0,0.43093331654866535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,1,float16,fp8,0,0.42098132769266766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,1,fp8,fp8,0,0.44014934698740643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,1,float16,float16,0,0.3647093375523885
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,1,float16,fp8,0,0.365392009417216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,1,fp8,fp8,0,0.3691573143005371
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,1,float16,float16,0,0.36744534969329834
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,1,float16,fp8,0,0.3671040137608846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,1,fp8,fp8,0,0.36847468217213947
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,1,float16,float16,0,0.37085866928100586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,1,float16,fp8,0,0.3715306520462036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,1,fp8,fp8,0,0.37939198811848956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,1,float16,float16,0,0.2307466665903727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,1,float16,fp8,0,0.223578671614329
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,1,fp8,fp8,0,0.2362026572227478
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,1,float16,float16,0,0.1938719948132833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,1,float16,fp8,0,0.19404266277949014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,1,fp8,fp8,0,0.19558932383855185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,1,float16,float16,0,0.1950719952583313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,1,float16,fp8,0,0.19575466712315878
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,1,fp8,fp8,0,0.1960960030555725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,1,float16,float16,0,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,1,float16,fp8,0,0.19848533471425375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,1,fp8,fp8,0,0.20002132654190063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,1,float16,float16,0,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,1,float16,fp8,0,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,1,fp8,fp8,0,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,1,float16,float16,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,1,float16,fp8,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,1,fp8,fp8,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,1,float16,float16,0,0.10889599720637004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,1,float16,fp8,0,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,1,fp8,fp8,0,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,1,float16,float16,0,0.11059733231862386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,1,fp8,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,1,float16,float16,0,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,1,float16,fp8,0,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,1,fp8,fp8,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,1,float16,float16,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,1,float16,fp8,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,1,float16,float16,0,0.06314133107662201
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,1,float16,fp8,0,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,1,fp8,fp8,0,0.06247466802597046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,1,float16,float16,0,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,1,fp8,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,1,float16,float16,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,1,float16,float16,0,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,1,float16,float16,0,1.46670929590861
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,1,float16,fp8,0,1.4644907315572102
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,1,fp8,fp8,0,1.4766079584757488
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,1,float16,float16,0,1.480021317799886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,1,float16,fp8,0,1.4738772710164387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,1,fp8,fp8,0,1.6245706876118977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,1,float16,float16,0,1.5175679524739583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,1,float16,fp8,0,1.497429370880127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,1,fp8,fp8,0,1.5856639544169109
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,1,float16,float16,0,0.8809813658396403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,1,float16,fp8,0,0.854698657989502
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,1,fp8,fp8,0,0.897706667582194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,1,float16,float16,0,0.7441066900889078
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,1,float16,fp8,0,0.7393279870351156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,1,fp8,fp8,0,0.7488853136698405
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,1,float16,float16,0,0.7509333292643229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,1,float16,fp8,0,0.7505973180135092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,1,fp8,fp8,0,0.7799519697825114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,1,float16,float16,0,0.7611680030822754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,1,float16,fp8,0,0.7577546437581381
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,1,fp8,fp8,0,0.7758560180664062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,1,float16,float16,0,0.4514186779658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,1,float16,fp8,0,0.4411733150482178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,1,fp8,fp8,0,0.4599466721216838
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,1,float16,float16,0,0.3862186670303345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,1,float16,fp8,0,0.38178133964538574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,1,fp8,fp8,0,0.38656000296274823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,1,float16,float16,0,0.38280534744262695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,1,float16,fp8,0,0.3831466833750407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,1,fp8,fp8,0,0.38757868607838947
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,1,float16,float16,0,0.39031465848286945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,1,float16,fp8,0,0.3882666826248169
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,1,fp8,fp8,0,0.39867734909057617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,1,float16,float16,0,0.23654399315516153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,1,float16,fp8,0,0.23109867175420126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,1,fp8,fp8,0,0.24302933613459268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,1,float16,float16,0,0.20189867417017618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,1,float16,fp8,0,0.20189332962036133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,1,fp8,fp8,0,0.20292266209920248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,1,float16,float16,0,0.2034506599108378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,1,float16,fp8,0,0.2034346659978231
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,1,fp8,fp8,0,0.20462934176127115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,1,float16,float16,0,0.20736000935236612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,1,float16,fp8,0,0.20616533358891806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,1,fp8,fp8,0,0.21042666832605997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,1,float16,float16,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,1,float16,fp8,0,0.12732266386349997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,1,fp8,fp8,0,0.13432000080744425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,1,float16,float16,0,0.11195733149846394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,1,fp8,fp8,0,0.11161599556605022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,1,float16,float16,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,1,float16,fp8,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,1,fp8,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,1,float16,float16,0,0.11229333281517029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,1,fp8,fp8,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,1,float16,float16,0,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,1,float16,fp8,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,1,fp8,fp8,0,0.08020799855391185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,1,float16,float16,0,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,1,float16,fp8,0,0.06518933176994324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,1,float16,float16,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,1,float16,fp8,0,0.06554666658242543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,1,fp8,fp8,0,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,1,float16,fp8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,1,float16,fp8,0,0.040965333580970764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,1,fp8,fp8,0,0.04026666780312856
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,1,float16,float16,0,0.02923733244339625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,1,float16,fp8,0,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,1,fp8,fp8,0,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,1,float16,float16,0,0.022069332500298817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,1,fp8,fp8,0,0.02072000006834666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,1,float16,float16,0,0.9552106857299805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,1,float16,fp8,0,0.9559093316396078
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,1,fp8,fp8,0,1.0494293371836345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,1,float16,float16,0,0.9640959898630778
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,1,float16,fp8,0,0.971226692199707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,1,fp8,fp8,0,1.1166666348775227
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,1,float16,float16,0,0.9750186602274576
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,1,float16,fp8,0,0.9726293087005615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,1,fp8,fp8,0,1.0886826515197754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,1,float16,float16,0,0.5560319821039835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,1,float16,fp8,0,0.5410186847050985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,1,float16,float16,0,0.49066134293874103
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,1,fp8,fp8,0,0.6079146862030029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,1,float16,fp8,0,0.4858880043029785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,1,fp8,fp8,0,0.5344160000483195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,1,float16,float16,0,0.49067731698354083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,1,float16,fp8,0,0.490666667620341
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,1,fp8,fp8,0,0.5406719843546549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,1,float16,float16,0,0.49886401494344074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,1,float16,fp8,0,0.4957866668701172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,1,fp8,fp8,0,0.5451093514760336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,1,float16,float16,0,0.29132266839345294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,1,float16,fp8,0,0.28382400671641034
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,1,fp8,fp8,0,0.31624533732732135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,1,float16,float16,0,0.25601067145665485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,1,float16,fp8,0,0.25463465849558514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,1,fp8,fp8,0,0.27665066719055176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,1,float16,float16,0,0.2563413381576538
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,1,float16,fp8,0,0.25600000222524005
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,1,fp8,fp8,0,0.2776799996693929
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,1,float16,float16,0,0.2597493330637614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,1,float16,fp8,0,0.258730669816335
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,1,fp8,fp8,0,0.28279467423756915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,1,float16,float16,0,0.15650666753451029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,1,float16,fp8,0,0.1520639955997467
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,1,fp8,fp8,0,0.1704960068066915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,1,float16,float16,0,0.13858133554458618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,1,float16,fp8,0,0.13755733768145242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,1,fp8,fp8,0,0.14865066607793173
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,1,float16,float16,0,0.13687466581662497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,1,float16,fp8,0,0.13550399740537009
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,1,fp8,fp8,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,1,float16,float16,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,1,float16,fp8,0,0.1378986636797587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,1,fp8,fp8,0,0.15052800377209982
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,1,float16,fp8,0,0.08567999800046285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,1,fp8,fp8,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,1,float16,float16,0,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,1,float16,fp8,0,0.07783466577529907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,1,float16,float16,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,1,float16,fp8,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,1,float16,float16,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,1,float16,float16,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,1,fp8,fp8,0,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,1,fp8,fp8,0,0.048138668139775596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,1,fp8,fp8,0,0.03481066723664602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,1,float16,float16,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,1,float16,fp8,0,0.03243733445803324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,1,float16,float16,0,0.024245334168275196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,1,float16,fp8,0,0.023887999355793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,1,float16,float16,0,0.02480533222357432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,1,float16,fp8,0,0.02481599897146225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,1,fp8,fp8,0,0.018730666488409042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,1,float16,float16,0,0.01869333287080129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,1,float16,float16,0,0.7584426403045654
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,1,float16,fp8,0,0.7570772965749105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,1,fp8,fp8,0,0.8529760042826334
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,1,float16,float16,0,0.7615040143330892
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,1,float16,fp8,0,0.762880007425944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,1,fp8,fp8,0,0.8570880095163981
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,1,float16,float16,0,0.7673172950744629
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,1,float16,fp8,0,0.7666347026824951
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,1,fp8,fp8,0,0.8601653575897217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,1,float16,float16,0,0.4237706661224365
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,1,float16,fp8,0,0.4177866776784261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,1,fp8,fp8,0,0.47325865427652997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,1,float16,float16,0,0.386896014213562
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,1,float16,fp8,0,0.38997332255045575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,1,fp8,fp8,0,0.4316106637318929
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,1,float16,float16,0,0.3903199831644694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,1,float16,fp8,0,0.39031465848286945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,1,fp8,fp8,0,0.438101331392924
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,1,float16,float16,0,0.3933866818745931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,1,float16,fp8,0,0.3933866818745931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,1,fp8,fp8,0,0.442197322845459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,1,float16,float16,0,0.22152533133824667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,1,float16,fp8,0,0.21844265858332315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,1,fp8,fp8,0,0.24951465924580893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,1,float16,float16,0,0.20531733830769858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,1,float16,fp8,0,0.20496533314387003
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,1,fp8,fp8,0,0.22869332631429037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,1,float16,float16,0,0.20292266209920248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,1,float16,fp8,0,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,1,fp8,fp8,0,0.22595733404159546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,1,float16,float16,0,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,1,float16,fp8,0,0.20360533396402994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,1,fp8,fp8,0,0.22971733411153158
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,1,float16,float16,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,1,float16,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,1,fp8,fp8,0,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,1,float16,float16,0,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,1,float16,fp8,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,1,float16,float16,0,0.10957866907119751
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,1,float16,fp8,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,1,float16,float16,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,1,fp8,fp8,0,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,1,float16,fp8,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,1,fp8,fp8,0,0.07229333122571309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,1,float16,fp8,0,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,1,float16,fp8,0,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,1,float16,fp8,0,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,1,fp8,fp8,0,0.06929600238800049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,1,float16,float16,0,0.03958400090535482
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,1,float16,float16,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,1,float16,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,1,float16,float16,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,1,float16,fp8,0,0.026858667532602947
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,1,fp8,fp8,0,0.027637332677841187
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,1,float16,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,1,fp8,fp8,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,1,float16,float16,0,0.022416000564893086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,1,float16,fp8,0,0.0176959993938605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,1,float16,float16,0,0.017621333400408428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,1,float16,float16,0,0.018858666221300762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,1,float16,float16,0,0.6693546772003174
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,1,float16,fp8,0,0.6683306694030762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,1,fp8,fp8,0,0.7523039976755778
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,1,float16,float16,0,0.6693546772003174
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,1,float16,fp8,0,0.6683306694030762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,1,fp8,fp8,0,0.7509333292643229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,1,float16,float16,0,0.6707200209299723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,1,float16,fp8,0,0.6707146962483724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,1,float16,float16,0,0.3609600067138672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,1,fp8,fp8,0,0.7564000288645426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,1,float16,fp8,0,0.3585760196050008
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,1,fp8,fp8,0,0.40618666013081867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,1,float16,float16,0,0.34457067648569745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,1,float16,fp8,0,0.3415040175120036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,1,fp8,fp8,0,0.38519465923309326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,1,float16,float16,0,0.3408213456471761
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,1,float16,fp8,0,0.3404800097147624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,1,fp8,fp8,0,0.3848533233006795
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,1,float16,float16,0,0.34355731805165607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,1,float16,fp8,0,0.34219201405843097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,1,fp8,fp8,0,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,1,float16,float16,0,0.18824533621470133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,1,float16,fp8,0,0.18653867642084757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,1,fp8,fp8,0,0.2146880030632019
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,1,float16,float16,0,0.1786880095799764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,1,float16,fp8,0,0.1783519983291626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,1,fp8,fp8,0,0.19899733861287436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,1,float16,float16,0,0.17868266503016153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,1,float16,fp8,0,0.1786880095799764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,1,fp8,fp8,0,0.19882667064666748
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,1,float16,float16,0,0.17920533816019693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,1,float16,fp8,0,0.18039466937383017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,1,fp8,fp8,0,0.1991680065790812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,1,float16,float16,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,1,float16,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,1,fp8,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,1,float16,fp8,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,1,fp8,fp8,0,0.10753066341082256
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,1,float16,float16,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,1,float16,fp8,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,1,fp8,fp8,0,0.10752532879511516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,1,float16,fp8,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,1,fp8,fp8,0,0.10854400197664897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,1,float16,float16,0,0.058037335673967995
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,1,float16,float16,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,1,float16,float16,0,0.057333335280418396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,1,fp8,fp8,0,0.061434666315714516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,1,float16,fp8,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,1,fp8,fp8,0,0.03746666759252548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,1,fp8,fp8,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,1,float16,fp8,0,0.03550933301448822
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,1,float16,fp8,0,0.02595199892918269
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,1,float16,float16,0,0.024853333830833435
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,1,float16,fp8,0,0.020661332954963047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,1,fp8,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,1,float16,float16,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,1,float16,fp8,0,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,1,fp8,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,1,float16,float16,0,0.017978666971127193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,1,float16,float16,0,28.84881083170573
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,1,float16,fp8,0,28.911786397298176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,1,fp8,fp8,0,21.97008005777995
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,1,float16,float16,0,28.794532775878906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,1,fp8,fp8,0,21.99194081624349
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,1,float16,fp8,0,29.490516662597656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,1,float16,float16,0,28.930219014485676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,1,float16,fp8,0,29.0513916015625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,1,fp8,fp8,0,22.110549926757812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,1,float16,float16,0,28.72423044840495
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,1,float16,fp8,0,29.413546244303387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,1,fp8,fp8,0,22.101003011067707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,1,float16,float16,0,15.577259063720703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,1,float16,float16,0,14.425941467285156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,1,float16,fp8,0,15.14291763305664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,1,float16,fp8,0,15.285418192545572
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,1,fp8,fp8,0,11.151690165201822
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,1,float16,float16,0,14.87615966796875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,1,fp8,fp8,0,11.52017084757487
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,1,float16,fp8,0,14.858411153157553
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,1,fp8,fp8,0,11.136795043945312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,1,float16,float16,0,14.186495463053385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,1,float16,fp8,0,13.929813385009766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,1,fp8,fp8,0,11.188565572102865
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,1,float16,float16,0,14.298629760742188
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,1,float16,fp8,0,14.548826853434244
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,1,fp8,fp8,0,11.16637929280599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,1,float16,float16,0,7.024645487467448
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,1,float16,float16,0,7.428613026936849
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,1,float16,fp8,0,7.008944193522136
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,1,float16,fp8,0,7.16868782043457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,1,fp8,fp8,0,5.97384516398112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,1,fp8,fp8,0,5.799423853556315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,1,float16,float16,0,6.960816065470378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,1,float16,fp8,0,7.158789316813151
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,1,fp8,fp8,0,5.770581563313802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,1,float16,float16,0,7.091541290283203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,1,float16,fp8,0,6.633301417032878
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,1,fp8,fp8,0,5.779290517171224
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,1,float16,float16,0,6.866949081420898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,1,float16,fp8,0,7.319722493489583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,1,fp8,fp8,0,5.79362678527832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,1,float16,float16,0,3.645440101623535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,1,float16,float16,0,3.609600067138672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,1,float16,fp8,0,3.6225706736246743
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,1,float16,fp8,0,3.5614773432413735
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,1,fp8,fp8,0,3.1974401473999023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,1,fp8,fp8,0,3.1080106099446616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,1,float16,float16,0,3.5438931783040366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,1,float16,fp8,0,3.5829865137736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,1,fp8,fp8,0,3.107669194539388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,1,float16,float16,0,3.586048126220703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,1,float16,fp8,0,3.51197878519694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,1,fp8,fp8,0,3.1110827128092446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,1,float16,float16,0,3.562837282816569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,1,float16,fp8,0,3.583317438761393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,1,fp8,fp8,0,3.117568016052246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,1,float16,float16,0,16.962047576904297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,1,float16,fp8,0,17.219924926757812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,1,fp8,fp8,0,13.097813924153646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,1,float16,float16,0,17.079125722249348
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,1,float16,fp8,0,17.000619252522785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,1,fp8,fp8,0,13.113173166910807
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,1,float16,float16,0,16.783023834228516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,1,float16,fp8,0,16.451578776041668
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,1,fp8,fp8,0,13.134581247965494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,1,float16,float16,0,17.080490112304688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,1,float16,fp8,0,17.001983642578125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,1,fp8,fp8,0,13.169663747151693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,1,float16,float16,0,8.21182378133138
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,1,float16,float16,0,8.00273068745931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,1,float16,fp8,0,8.184842427571615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,1,float16,fp8,0,8.98969586690267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,1,fp8,fp8,0,7.015082677205403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,1,fp8,fp8,0,6.6828053792317705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,1,float16,float16,0,7.665669123331706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,1,float16,fp8,0,8.541029612223307
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,1,fp8,fp8,0,6.68671989440918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,1,float16,float16,0,8.311813354492188
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,1,float16,fp8,0,8.030730565388998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,1,fp8,fp8,0,6.714368184407552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,1,float16,float16,0,8.206853230794271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,1,float16,fp8,0,8.272218704223633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,1,fp8,fp8,0,6.718805313110352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,1,float16,float16,0,4.123317400614421
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,1,float16,float16,0,3.9521280924479165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,1,float16,fp8,0,4.278613408406575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,1,float16,fp8,0,4.010832150777181
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,1,fp8,fp8,0,3.507882754007975
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,1,fp8,fp8,0,3.6601174672444663
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,1,float16,float16,0,4.057770729064941
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,1,float16,fp8,0,3.959295908610026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,1,fp8,fp8,0,3.511295954386393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,1,float16,float16,0,4.407631874084473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,1,float16,fp8,0,4.050944010416667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,1,fp8,fp8,0,3.516074816385905
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,1,float16,float16,0,4.008805274963379
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,1,float16,fp8,0,4.011520067850749
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,1,fp8,fp8,0,3.52562681833903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,1,float16,float16,0,2.1828266779581704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,1,float16,float16,0,2.241696039835612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,1,float16,fp8,0,2.219519933064779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,1,float16,fp8,0,2.2058614095052085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,1,fp8,fp8,0,1.9937280019124348
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,1,fp8,fp8,0,1.9283626874287922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,1,float16,float16,0,2.136741320292155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,1,float16,fp8,0,2.223445256551107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,1,fp8,fp8,0,1.9276800155639648
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,1,float16,float16,0,2.1845332781473794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,1,float16,fp8,0,2.2272000312805176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,1,fp8,fp8,0,1.9368960062662761
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,1,float16,float16,0,2.171232064565023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,1,float16,fp8,0,2.1790879567464194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,1,fp8,fp8,0,1.9348479906717937
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,1,float16,float16,0,12.080132802327475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,1,float16,fp8,0,12.083199818929037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,1,fp8,fp8,0,9.498448053995768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,1,float16,float16,0,12.34347152709961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,1,float16,fp8,0,11.873626708984375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,1,fp8,fp8,0,9.517226537068685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,1,float16,float16,0,12.244149525960287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,1,float16,fp8,0,11.682299296061197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,1,fp8,fp8,0,9.524400075276693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,1,float16,float16,0,12.221956888834635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,1,float16,fp8,0,11.725828806559244
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,1,fp8,fp8,0,9.561248143513998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,1,float16,float16,0,5.802154541015625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,1,float16,fp8,0,5.6623789469401045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,1,float16,float16,0,5.816325505574544
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,1,float16,fp8,0,5.889210383097331
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,1,fp8,fp8,0,5.175130526224772
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,1,fp8,fp8,0,4.884138743082683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,1,float16,float16,0,5.4777170817057295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,1,float16,fp8,0,5.722288131713867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,1,fp8,fp8,0,4.914527893066406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,1,float16,float16,0,5.6777388254801435
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,1,float16,fp8,0,5.536938349405925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,1,fp8,fp8,0,4.897450764973958
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,1,float16,float16,0,5.623472213745117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,1,float16,fp8,0,5.518341064453125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,1,fp8,fp8,0,4.91486930847168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,1,float16,float16,0,3.1077868143717446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,1,float16,float16,0,2.945712089538574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,1,float16,fp8,0,3.065685272216797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,1,float16,fp8,0,2.9248854319254556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,1,fp8,fp8,0,2.5854293505350747
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,1,float16,float16,0,3.016362508138021
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,1,fp8,fp8,0,2.705066680908203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,1,float16,fp8,0,2.9231786727905273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,1,fp8,fp8,0,2.5850879351298013
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,1,float16,float16,0,3.0117546717325845
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,1,float16,fp8,0,2.8939946492513022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,1,fp8,fp8,0,2.5847466786702475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,1,float16,float16,0,2.9603840510050454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,1,float16,fp8,0,2.9409173329671225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,1,fp8,fp8,0,2.5909172693888345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,1,float16,float16,0,1.5812320709228516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,1,float16,float16,0,1.6749226252237956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,1,float16,fp8,0,1.7068373362223308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,1,float16,fp8,0,1.6102399826049805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,1,fp8,fp8,0,1.4346346855163574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,1,fp8,fp8,0,1.496064027150472
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,1,float16,float16,0,1.6549545923868816
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,1,float16,fp8,0,1.5941972732543945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,1,fp8,fp8,0,1.435306708017985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,1,float16,float16,0,1.6020480791727703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,1,float16,fp8,0,1.6204800605773926
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,1,fp8,fp8,0,1.438037395477295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,1,float16,float16,0,1.610581398010254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,1,float16,fp8,0,1.638912041982015
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,1,fp8,fp8,0,1.4426453908284504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,1,float16,float16,0,17.076405843098957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,1,fp8,fp8,0,12.879013061523438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,1,float16,fp8,0,16.22323226928711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,1,float16,float16,0,15.792298634847006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,1,float16,fp8,0,16.00170644124349
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,1,fp8,fp8,0,12.890640258789062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,1,float16,float16,0,16.837295532226562
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,1,float16,fp8,0,16.05905024210612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,1,fp8,fp8,0,12.941487630208334
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,1,float16,float16,0,16.09216054280599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,1,float16,fp8,0,16.255482991536457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,1,fp8,fp8,0,12.945578257242838
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,1,float16,float16,0,8.272725423177084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,1,float16,fp8,0,7.552015940348308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,1,float16,float16,0,8.329733530680338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,1,float16,fp8,0,8.397829055786133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,1,fp8,fp8,0,6.935728073120117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,1,fp8,fp8,0,6.509056091308594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,1,float16,float16,0,7.905973434448242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,1,float16,fp8,0,7.781034469604492
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,1,fp8,fp8,0,6.512298583984375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,1,float16,float16,0,7.658159891764323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,1,float16,fp8,0,7.170218785603841
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,1,fp8,fp8,0,6.550869623819987
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,1,float16,float16,0,7.6782989501953125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,1,float16,fp8,0,7.990447998046875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,1,fp8,fp8,0,6.5495039621988935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,1,float16,float16,0,4.123648007710774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,1,float16,float16,0,3.7171198527018228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,1,float16,fp8,0,4.080298741658528
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,1,float16,fp8,0,3.8401705423990884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,1,fp8,fp8,0,3.359402656555176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,1,fp8,fp8,0,3.5758078893025718
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,1,float16,float16,0,4.023983955383301
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,1,float16,fp8,0,3.7741225560506186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,1,fp8,fp8,0,3.3602558771769204
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,1,float16,float16,0,3.860645294189453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,1,float16,fp8,0,3.7971626917521157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,1,fp8,fp8,0,3.3704961140950522
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,1,float16,float16,0,3.7152481079101562
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,1,float16,fp8,0,3.8838561375935874
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,1,fp8,fp8,0,3.3776639302571616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,1,float16,float16,0,2.028538703918457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,1,float16,float16,0,2.123429298400879
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,1,float16,fp8,0,2.093386650085449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,1,float16,fp8,0,2.004650592803955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,1,fp8,fp8,0,1.8935413360595703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,1,fp8,fp8,0,1.7908053398132324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,1,float16,float16,0,2.036400000254313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,1,float16,fp8,0,2.032298723856608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,1,fp8,fp8,0,1.7959252993265789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,1,float16,float16,0,2.0094292958577475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,1,float16,fp8,0,2.0445920626322427
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,1,fp8,fp8,0,1.7983147303263347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,1,float16,float16,0,2.0326293309529624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,1,float16,fp8,0,2.079744021097819
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,1,fp8,fp8,0,1.8078667322794597
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,1,float16,float16,0,1.182037353515625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,1,float16,float16,0,1.140565315882365
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,1,float16,fp8,0,1.2161706288655598
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,1,fp8,fp8,0,1.064453363418579
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,1,float16,fp8,0,1.1651413440704346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,1,fp8,fp8,0,1.016319990158081
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,1,float16,float16,0,1.160703976949056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,1,float16,fp8,0,1.1473920345306396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,1,fp8,fp8,0,1.0241706371307373
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,1,float16,float16,0,1.142954667409261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,1,float16,fp8,0,1.1682133674621582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,1,fp8,fp8,0,1.020074685414632
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,1,float16,float16,0,1.1603626410166423
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,1,float16,fp8,0,1.1501226425170898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,1,fp8,fp8,0,1.0262186527252197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,1,float16,float16,0,9.468416213989258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,1,float16,fp8,0,9.236143747965494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,1,fp8,fp8,0,7.99778683980306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,1,float16,float16,0,8.845312118530273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,1,float16,fp8,0,9.784661610921225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,1,fp8,fp8,0,8.017413457234701
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,1,float16,float16,0,10.049370447794596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,1,float16,fp8,0,9.66588274637858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,1,fp8,fp8,0,8.039594650268555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,1,float16,float16,0,9.793200174967447
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,1,float16,fp8,0,10.092367808024088
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,1,fp8,fp8,0,8.08294423421224
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,1,float16,float16,0,4.590421358744304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,1,float16,fp8,0,4.547072092692058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,1,float16,float16,0,4.833279927571614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,1,float16,fp8,0,4.9384158452351885
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,1,fp8,fp8,0,4.396527926127116
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,1,fp8,fp8,0,4.0731306076049805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,1,float16,float16,0,4.68070920308431
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,1,float16,fp8,0,4.556288083394368
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,1,fp8,fp8,0,4.076213200887044
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,1,float16,float16,0,4.622511863708496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,1,float16,fp8,0,4.493824005126953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,1,fp8,fp8,0,4.084399859110515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,1,float16,float16,0,4.4880320231119795
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,1,float16,fp8,0,4.574037233988444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,1,fp8,fp8,0,4.107946713765462
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,1,float16,float16,0,2.493610699971517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,1,float16,float16,0,2.345813274383545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,1,float16,fp8,0,2.5099946657816568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,1,float16,fp8,0,2.388853391011556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,1,fp8,fp8,0,2.120351950327555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,1,fp8,fp8,0,2.2860800425211587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,1,float16,float16,0,2.3190186818440757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,1,float16,fp8,0,2.3522987365722656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,1,fp8,fp8,0,2.1237866083780923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,1,float16,float16,0,2.3744853337605796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,1,float16,fp8,0,2.325493335723877
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,1,fp8,fp8,0,2.1425493558247886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,1,float16,float16,0,2.3652639389038086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,1,float16,fp8,0,2.4267093340555825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,1,fp8,fp8,0,2.137429396311442
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,1,float16,float16,0,1.285973310470581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,1,float16,float16,0,1.3315412998199463
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,1,float16,fp8,0,1.3550933202107747
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,1,fp8,fp8,0,1.2284639676411946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,1,float16,fp8,0,1.2573013305664062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,1,fp8,fp8,0,1.1576320330301921
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,1,float16,float16,0,1.2893866697947185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,1,float16,fp8,0,1.302016019821167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,1,fp8,fp8,0,1.152517318725586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,1,float16,float16,0,1.3057759602864583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,1,float16,fp8,0,1.3078239758809407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,1,fp8,fp8,0,1.1559253533681233
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,1,float16,float16,0,1.2733439604441326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,1,float16,fp8,0,1.280512015024821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,1,fp8,fp8,0,1.1576320330301921
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,1,float16,float16,0,0.7796053091684977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,1,float16,fp8,0,0.7772160371144613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,1,float16,float16,0,0.7314773400624593
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,1,fp8,fp8,0,0.705397367477417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,1,float16,fp8,0,0.7516160011291504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,1,fp8,fp8,0,0.6666346788406372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,1,float16,float16,0,0.7458133697509766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,1,float16,fp8,0,0.7505866686503092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,1,fp8,fp8,0,0.6700373490651449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,1,float16,float16,0,0.740010658899943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,1,float16,fp8,0,0.7502453327178955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,1,fp8,fp8,0,0.6714026927947998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,1,float16,float16,0,0.7567253112792969
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,1,float16,fp8,0,0.7437653541564941
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,1,fp8,fp8,0,0.6707200209299723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,1,float16,float16,0,9.715882619222006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,1,float16,fp8,0,10.092026392618815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,1,fp8,fp8,0,8.598698933919271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,1,float16,float16,0,9.816058476765951
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,1,fp8,fp8,0,8.642218907674154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,1,float16,fp8,0,9.801381429036459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,1,float16,float16,0,9.70854377746582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,1,float16,fp8,0,10.114224116007486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,1,fp8,fp8,0,8.647680282592773
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,1,float16,float16,0,9.75700823465983
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,1,float16,fp8,0,10.17531712849935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,1,fp8,fp8,0,8.726357142130533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,1,float16,float16,0,4.6699520746866865
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,1,float16,fp8,0,4.674394607543945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,1,float16,float16,0,5.152245203653972
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,1,float16,fp8,0,5.150543848673503
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,1,fp8,fp8,0,4.727471987406413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,1,fp8,fp8,0,4.326058705647786
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,1,float16,float16,0,4.714847882588704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,1,float16,fp8,0,4.641274770100911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,1,fp8,fp8,0,4.317525227864583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,1,float16,float16,0,4.68940798441569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,1,float16,fp8,0,4.67029349009196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,1,fp8,fp8,0,4.333056131998698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,1,float16,float16,0,4.739930788675944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,1,float16,fp8,0,4.790272076924642
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,1,fp8,fp8,0,4.352853457132976
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,1,float16,float16,0,2.5854293505350747
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,1,float16,float16,0,2.388480027516683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,1,float16,fp8,0,2.574848016103109
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,1,float16,fp8,0,2.3819947242736816
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,1,fp8,fp8,0,2.4444586435953775
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,1,fp8,fp8,0,2.217301368713379
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,1,float16,float16,0,2.3608266512552896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,1,float16,fp8,0,2.3840266863505044
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,1,fp8,fp8,0,2.2231040000915527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,1,float16,float16,0,2.3963306744893393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,1,float16,fp8,0,2.368000030517578
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,1,fp8,fp8,0,2.228053410847982
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,1,float16,float16,0,2.410559972127279
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,1,float16,fp8,0,2.4140799840291343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,1,fp8,fp8,0,2.2396586736043296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,1,float16,float16,0,1.2439946333567302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,1,float16,float16,0,1.3475839296976726
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,1,float16,fp8,0,1.3742079734802246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,1,fp8,fp8,0,1.2822186946868896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,1,float16,fp8,0,1.2555946509043376
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,1,fp8,fp8,0,1.1699199676513672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,1,float16,float16,0,1.2863146464029949
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,1,float16,fp8,0,1.290069341659546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,1,fp8,fp8,0,1.1753919919331868
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,1,float16,float16,0,1.2573013305664062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,1,float16,fp8,0,1.2921173572540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,1,fp8,fp8,0,1.1801653703053792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,1,float16,float16,0,1.2801653544108074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,1,float16,fp8,0,1.2835893630981445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,1,fp8,fp8,0,1.1892053286234539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,1,float16,float16,0,0.7591253121693929
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,1,float16,fp8,0,0.756389300028483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,1,float16,float16,0,0.7127040227254232
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,1,fp8,fp8,0,0.7038293679555258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,1,float16,fp8,0,0.7195253372192383
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,1,fp8,fp8,0,0.649727980295817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,1,float16,float16,0,0.7113386789957682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,1,float16,fp8,0,0.7154346307118734
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,1,fp8,fp8,0,0.6521173318227133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,1,float16,float16,0,0.7082666556040446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,1,float16,fp8,0,0.7167999744415283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,1,fp8,fp8,0,0.6563839912414551
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,1,float16,float16,0,0.7215786774953207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,1,float16,fp8,0,0.7113386789957682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,1,fp8,fp8,0,0.6567253271738688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,1,float16,float16,0,0.442197322845459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,1,float16,float16,0,0.41813866297403973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,1,float16,fp8,0,0.4503893454869588
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,1,fp8,fp8,0,0.41335467497507733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,1,float16,fp8,0,0.42102400461832684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,1,fp8,fp8,0,0.38860801855723065
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,1,float16,float16,0,0.42001068592071533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,1,float16,fp8,0,0.41813333829243976
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,1,fp8,fp8,0,0.38894931475321454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,1,float16,float16,0,0.42188799381256104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,1,float16,fp8,0,0.4254719813664754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,1,fp8,fp8,0,0.3882613182067871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,1,float16,float16,0,0.4265013138453166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,1,float16,fp8,0,0.4271786610285441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,1,fp8,fp8,0,0.3927040100097656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,1,float16,float16,0,6.063781102498372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,1,float16,fp8,0,6.021461486816406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,1,fp8,fp8,0,5.775530497233073
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,1,float16,float16,0,6.055936177571614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,1,float16,fp8,0,6.029823939005534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,1,fp8,fp8,0,5.775871912638347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,1,float16,float16,0,6.260559717814128
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,1,float16,fp8,0,6.1265869140625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,1,fp8,fp8,0,5.793280283610026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,1,float16,float16,0,6.079317092895508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,1,float16,fp8,0,6.237354914347331
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,1,fp8,fp8,0,5.835264205932617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,1,float16,float16,0,3.043498675028483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,1,float16,fp8,0,3.047242800394694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,1,float16,float16,0,3.2986507415771484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,1,float16,fp8,0,3.347968101501465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,1,fp8,fp8,0,3.227818806966146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,1,fp8,fp8,0,2.9008213678995767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,1,float16,float16,0,3.0404319763183594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,1,float16,fp8,0,3.0458879470825195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,1,fp8,fp8,0,2.921642621358236
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,1,float16,float16,0,3.0960585276285806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,1,float16,fp8,0,3.045877456665039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,1,fp8,fp8,0,2.9253972371419272
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,1,float16,float16,0,3.082751909891764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,1,float16,fp8,0,3.0759251912434897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,1,fp8,fp8,0,2.938357353210449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,1,float16,float16,0,1.7034239768981934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,1,float16,float16,0,1.5660373369852703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,1,float16,fp8,0,1.7491520245869954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,1,float16,fp8,0,1.5692853927612305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,1,fp8,fp8,0,1.6600747108459473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,1,fp8,fp8,0,1.5052800178527832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,1,float16,float16,0,1.5791786511739094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,1,float16,fp8,0,1.5904426574707031
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,1,fp8,fp8,0,1.510741392771403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,1,float16,float16,0,1.5798719724019368
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,1,float16,fp8,0,1.5812320709228516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,1,fp8,fp8,0,1.5138133366902669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,1,float16,float16,0,1.6040959358215332
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,1,float16,fp8,0,1.5849812825520833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,1,fp8,fp8,0,1.5226880709330242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,1,float16,float16,0,0.8540159861246744
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,1,float16,float16,0,0.9093120098114014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,1,float16,fp8,0,0.9388373692830404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,1,float16,fp8,0,0.8526506423950195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,1,fp8,fp8,0,0.8782506783803304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,1,fp8,fp8,0,0.8060639699300131
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,1,float16,float16,0,0.8557226657867432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,1,float16,fp8,0,0.8543573220570883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,1,fp8,fp8,0,0.8023040294647217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,1,float16,float16,0,0.8553813298543295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,1,float16,fp8,0,0.8567466735839844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,1,fp8,fp8,0,0.8084479967753092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,1,float16,float16,0,0.8516266345977783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,1,float16,fp8,0,0.8635733127593994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,1,fp8,fp8,0,0.8118613560994467
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,1,float16,float16,0,0.5125120083491007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,1,float16,fp8,0,0.5155839920043945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,1,float16,float16,0,0.4800853331883748
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,1,fp8,fp8,0,0.48895466327667236
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,1,float16,fp8,0,0.4797439972559611
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,1,fp8,fp8,0,0.4490186770757039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,1,float16,float16,0,0.47941867510477704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,1,float16,fp8,0,0.48349865277608234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,1,fp8,fp8,0,0.4527946710586548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,1,float16,float16,0,0.47701334953308105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,1,float16,fp8,0,0.48622934023539227
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,1,fp8,fp8,0,0.4561920166015625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,1,float16,float16,0,0.4827093283335368
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,1,float16,fp8,0,0.4814560015996297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,1,fp8,fp8,0,0.45653335253397626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,1,float16,float16,0,0.30737600723902386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,1,float16,float16,0,0.2862079938252767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,1,float16,fp8,0,0.31327466169993085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,1,fp8,fp8,0,0.2926933368047078
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,1,float16,fp8,0,0.2879146734873454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,1,fp8,fp8,0,0.27357866366704303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,1,float16,float16,0,0.2851840058962504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,1,float16,fp8,0,0.2848479946454366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,1,fp8,fp8,0,0.2746079961458842
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,1,float16,float16,0,0.2872320016225179
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,1,float16,fp8,0,0.28893866141637164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,1,fp8,fp8,0,0.27323732773462933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,1,float16,float16,0,0.2903040051460266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,1,float16,fp8,0,0.2913279930750529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,1,fp8,fp8,0,0.2769920031229655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,1,float16,float16,0,7.012351989746094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,1,float16,fp8,0,7.002447764078776
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,1,fp8,fp8,0,6.917461395263672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,1,float16,float16,0,7.08027712504069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,1,float16,fp8,0,7.024645487467448
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,1,fp8,fp8,0,6.9309438069661455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,1,float16,float16,0,7.042389551798503
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,1,float16,fp8,0,7.076885223388672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,1,fp8,fp8,0,6.956031799316406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,1,float16,float16,0,7.0719146728515625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,1,float16,fp8,0,7.087968190511067
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,1,fp8,fp8,0,7.043754577636719
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,1,float16,float16,0,3.5051520665486655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,1,float16,fp8,0,3.4976425170898438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,1,float16,float16,0,3.8190027872721353
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,1,float16,fp8,0,3.8623520533243814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,1,fp8,fp8,0,3.863722801208496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,1,fp8,fp8,0,3.459413210550944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,1,float16,float16,0,3.490821202596029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,1,float16,fp8,0,3.4983253479003906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,1,fp8,fp8,0,3.462143898010254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,1,float16,float16,0,3.5222133000691733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,1,float16,fp8,0,3.5007146199544272
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,1,fp8,fp8,0,3.4723841349283853
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,1,float16,float16,0,3.522218704223633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,1,float16,fp8,0,3.540138562520345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,1,fp8,fp8,0,3.4955946604410806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,1,float16,float16,0,1.9508959452311199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,1,float16,float16,0,1.7716906865437825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,1,float16,fp8,0,1.97597869237264
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,1,float16,fp8,0,1.7696426709493
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,1,fp8,fp8,0,1.9633493423461914
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,1,fp8,fp8,0,1.7580374081929524
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,1,float16,float16,0,1.7877333958943684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,1,float16,fp8,0,1.7744213740030925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,1,fp8,fp8,0,1.7655466397603352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,1,float16,float16,0,1.7853439648946126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,1,float16,fp8,0,1.7785120010375977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,1,fp8,fp8,0,1.7744213740030925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,1,float16,float16,0,1.806506633758545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,1,float16,fp8,0,1.7966079711914062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,1,fp8,fp8,0,1.7897814114888508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,1,float16,float16,0,0.9309813181559244
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,1,float16,float16,0,1.0152959823608398
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,1,float16,fp8,0,1.03166929880778
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,1,fp8,fp8,0,1.0197386741638184
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,1,float16,fp8,0,0.9268799622853597
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,1,fp8,fp8,0,0.9224426746368408
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,1,float16,float16,0,0.9326933224995931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,1,float16,fp8,0,0.9292853673299154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,1,fp8,fp8,0,0.9190399646759033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,1,float16,float16,0,0.9347413380940756
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,1,float16,fp8,0,0.9371306896209717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,1,fp8,fp8,0,0.9210879802703857
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,1,float16,float16,0,0.9425919850667318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,1,float16,fp8,0,0.9477120240529379
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,1,fp8,fp8,0,0.9320267041524252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,1,float16,float16,0,0.5413546562194824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,1,float16,fp8,0,0.5543093283971151
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,1,float16,float16,0,0.49988798300425213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,1,fp8,fp8,0,0.544426679611206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,1,float16,fp8,0,0.4978346824645996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,1,fp8,fp8,0,0.4910080035527547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,1,float16,float16,0,0.5009066661198934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,1,float16,fp8,0,0.504319985707601
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,1,fp8,fp8,0,0.4916906754175822
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,1,float16,float16,0,0.5032960176467896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,1,float16,fp8,0,0.5060266653696696
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,1,fp8,fp8,0,0.4974986712137858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,1,float16,float16,0,0.5050026575724283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,1,float16,fp8,0,0.5087680021921793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,1,fp8,fp8,0,0.49988265832265216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,1,float16,float16,0,0.308405339717865
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,1,float16,float16,0,0.2821173270543416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,1,float16,fp8,0,0.3114666740099589
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,1,float16,fp8,0,0.2834773262341817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,1,fp8,fp8,0,0.30429333448410034
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,1,fp8,fp8,0,0.2783573269844055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,1,float16,float16,0,0.2797226707140605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,1,float16,fp8,0,0.28381333748499554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,1,fp8,fp8,0,0.2800640066464742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,1,float16,float16,0,0.2841599980990092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,1,float16,fp8,0,0.2834933400154114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,1,fp8,fp8,0,0.2821066578229268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,1,float16,float16,0,0.2865333358446757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,1,float16,fp8,0,0.2872320016225179
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,1,fp8,fp8,0,0.2821066578229268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,1,float16,float16,0,0.1728853384653727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,1,float16,float16,0,0.18961066007614136
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,1,float16,fp8,0,0.1930239995320638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,1,fp8,fp8,0,0.18636800845464072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,1,float16,fp8,0,0.17322667439778647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,1,fp8,fp8,0,0.17220266660054526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,1,float16,float16,0,0.1728853384653727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,1,float16,fp8,0,0.17356799046198526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,1,fp8,fp8,0,0.16913066307703653
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,1,float16,float16,0,0.17595734198888144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,1,float16,fp8,0,0.17493333419164023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,1,fp8,fp8,0,0.1687893271446228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,1,float16,float16,0,0.1713493267695109
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,1,float16,fp8,0,0.17732266585032144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,1,fp8,fp8,0,0.16913066307703653
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,1,float16,fp8,0,4.857845306396484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,1,float16,float16,0,4.883786519368489
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,1,fp8,fp8,0,4.9131520589192705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,1,float16,float16,0,4.880378723144531
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,1,float16,fp8,0,4.879018783569336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,1,fp8,fp8,0,4.928165435791016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,1,float16,float16,0,4.884144147237142
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,1,float16,fp8,0,4.89028263092041
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,1,fp8,fp8,0,4.93669859568278
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,1,float16,float16,0,4.945237477620442
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,1,float16,fp8,0,4.926805178324382
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,1,fp8,fp8,0,4.990463892618815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,1,float16,float16,0,2.4246506690979004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,1,float16,fp8,0,2.434218724568685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,1,float16,float16,0,2.6821972529093423
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,1,float16,fp8,0,2.711210568745931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,1,fp8,fp8,0,2.761727968851725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,1,fp8,fp8,0,2.4546987215677896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,1,float16,float16,0,2.4297812779744468
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,1,float16,fp8,0,2.4441173871358237
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,1,fp8,fp8,0,2.4645973841349282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,1,float16,float16,0,2.43831459681193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,1,float16,fp8,0,2.4410400390625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,1,fp8,fp8,0,2.4833760261535645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,1,float16,float16,0,2.4546987215677896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,1,float16,fp8,0,2.462549368540446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,1,fp8,fp8,0,2.491567929585775
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,1,float16,float16,0,1.3765974044799805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,1,float16,float16,0,1.2475787003835042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,1,float16,fp8,0,1.3895732561747234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,1,float16,fp8,0,1.2513279914855957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,1,fp8,fp8,0,1.4165333112080891
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,1,fp8,fp8,0,1.2608853181203206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,1,float16,float16,0,1.2564533551534016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,1,float16,fp8,0,1.2567893664042156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,1,fp8,fp8,0,1.2651573022206624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,1,float16,float16,0,1.2537226676940918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,1,float16,fp8,0,1.2593440214792888
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,1,fp8,fp8,0,1.2706133524576824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,1,float16,float16,0,1.256277322769165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,1,float16,fp8,0,1.2586666742960613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,1,fp8,fp8,0,1.2811946868896484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,1,float16,float16,0,0.65774933497111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,1,float16,float16,0,0.7178293069203695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,1,float16,fp8,0,0.7290879885355631
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,1,float16,fp8,0,0.6574079990386963
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,1,fp8,fp8,0,0.7393279870351156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,1,fp8,fp8,0,0.6642346779505411
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,1,float16,float16,0,0.65774933497111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,1,float16,fp8,0,0.6574079990386963
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,1,fp8,fp8,0,0.6611626545588175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,1,float16,float16,0,0.6611626545588175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,1,float16,fp8,0,0.6608213186264038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,1,fp8,fp8,0,0.667306661605835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,1,float16,float16,0,0.6601386864980062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,1,float16,fp8,0,0.6662826538085938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,1,fp8,fp8,0,0.6710720062255859
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,1,float16,float16,0,0.389631986618042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,1,float16,fp8,0,0.3930506706237793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,1,float16,float16,0,0.35549867153167725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,1,float16,fp8,0,0.3582346836725871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,1,fp8,fp8,0,0.39867734909057617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,1,fp8,fp8,0,0.35891199111938477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,1,float16,float16,0,0.3572053511937459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,1,float16,fp8,0,0.359935998916626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,1,fp8,fp8,0,0.36061867078145343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,1,float16,float16,0,0.35652267932891846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,1,float16,fp8,0,0.3595946629842122
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,1,fp8,fp8,0,0.3643786509831746
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,1,float16,float16,0,0.36130134264628094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,1,float16,fp8,0,0.36095468203226727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,1,fp8,fp8,0,0.36402666568756104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,1,float16,float16,0,0.22562666734059653
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,1,float16,float16,0,0.20189332962036133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,1,float16,fp8,0,0.22937599817911783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,1,fp8,fp8,0,0.22731733322143555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,1,float16,fp8,0,0.20291733741760254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,1,fp8,fp8,0,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,1,float16,float16,0,0.20155733823776245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,1,float16,fp8,0,0.20242132743199667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,1,fp8,fp8,0,0.2065066695213318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,1,float16,float16,0,0.20292266209920248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,1,float16,fp8,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,1,fp8,fp8,0,0.20718934138615927
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,1,float16,float16,0,0.20599466562271118
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,1,float16,fp8,0,0.20838399728139242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,1,fp8,fp8,0,0.21043733755747476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,1,float16,float16,0,0.13994666934013367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,1,float16,fp8,0,0.14250666896502176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,1,fp8,fp8,0,0.14217066764831543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,1,float16,float16,0,0.12869333227475485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,1,float16,fp8,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,1,fp8,fp8,0,0.1307253340880076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,1,float16,float16,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,1,float16,fp8,0,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,1,fp8,fp8,0,0.130730668703715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,1,float16,float16,0,0.130730668703715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,1,float16,fp8,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,1,fp8,fp8,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,1,float16,float16,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,1,float16,fp8,0,0.13147200147310892
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,1,fp8,fp8,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,1,float16,float16,0,5.538815816243489
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,1,float16,fp8,0,5.461333592732747
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,1,fp8,fp8,0,5.907616297403972
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,1,float16,float16,0,5.576704025268555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,1,float16,fp8,0,5.597871780395508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,1,fp8,fp8,0,5.96019172668457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,1,float16,float16,0,5.597525278727214
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,1,float16,fp8,0,5.60366948445638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,1,fp8,fp8,0,5.9714616139729815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,1,float16,float16,0,5.665285110473633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,1,float16,fp8,0,5.6516157786051435
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,1,fp8,fp8,0,6.4238936106363935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,1,float16,float16,0,2.7376588185628257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,1,float16,fp8,0,2.7216211954752603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,1,float16,float16,0,3.154773394266764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,1,float16,fp8,0,3.061936060587565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,1,fp8,fp8,0,3.2918132146199546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,1,fp8,fp8,0,2.8977492650349936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,1,float16,float16,0,2.764122645060221
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,1,float16,fp8,0,2.750629425048828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,1,fp8,fp8,0,2.9518346786499023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,1,float16,float16,0,2.7733332316080728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,1,float16,fp8,0,2.7665014266967773
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,1,fp8,fp8,0,2.9818881352742515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,1,float16,float16,0,2.8105386098225913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,1,float16,fp8,0,2.8139467239379883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,1,fp8,fp8,0,3.2114346822102866
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,1,float16,float16,0,1.558527946472168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,1,float16,float16,0,1.3847893079121907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,1,float16,fp8,0,1.5325867335001628
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,1,float16,fp8,0,1.3725066184997559
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,1,fp8,fp8,0,1.636522610982259
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,1,fp8,fp8,0,1.446741263071696
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,1,float16,float16,0,1.3875199953715007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,1,float16,fp8,0,1.3946879704793294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,1,fp8,fp8,0,1.478650728861491
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,1,float16,float16,0,1.4018559455871582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,1,float16,fp8,0,1.3905919392903645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,1,fp8,fp8,0,1.493333339691162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,1,float16,float16,0,1.4097013473510742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,1,float16,fp8,0,1.4103892644246419
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,1,fp8,fp8,0,1.598629315694173
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,1,float16,float16,0,0.7075839837392172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,1,float16,float16,0,0.7806293169657389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,1,float16,fp8,0,0.7703946431477865
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,1,fp8,fp8,0,0.8272213141123453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,1,float16,fp8,0,0.7055413722991943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,1,fp8,fp8,0,0.7342080275217692
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,1,float16,float16,0,0.7092853387196859
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,1,float16,fp8,0,0.7082666556040446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,1,fp8,fp8,0,0.7512746651967367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,1,float16,float16,0,0.7092853387196859
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,1,float16,fp8,0,0.7120160261789957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,1,fp8,fp8,0,0.7505919933319092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,1,float16,float16,0,0.7215786774953207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,1,float16,fp8,0,0.7150932947794596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,1,fp8,fp8,0,0.8012800216674805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,1,float16,float16,0,0.40413331985473633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,1,float16,fp8,0,0.39765334129333496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,1,float16,float16,0,0.3657439947128296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,1,fp8,fp8,0,0.4230826695760091
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,1,float16,fp8,0,0.36403199036916095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,1,fp8,fp8,0,0.38041067123413086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,1,float16,float16,0,0.3671040137608846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,1,float16,fp8,0,0.3660800059636434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,1,fp8,fp8,0,0.38655467828114826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,1,float16,float16,0,0.3687146504720052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,1,float16,fp8,0,0.3681173324584961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,1,fp8,fp8,0,0.38416000207265216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,1,float16,float16,0,0.3725653489430745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,1,float16,fp8,0,0.3725653489430745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,1,fp8,fp8,0,0.3952639897664388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,1,float16,float16,0,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,1,float16,float16,0,0.19404800732930502
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,1,float16,fp8,0,0.21196800470352173
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,1,fp8,fp8,0,0.22561599810918173
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,1,float16,fp8,0,0.19233600298563638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,1,fp8,fp8,0,0.2005280057589213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,1,float16,float16,0,0.1950719952583313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,1,float16,fp8,0,0.19660800695419312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,1,fp8,fp8,0,0.20155733823776245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,1,float16,float16,0,0.1962666710217794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,1,float16,fp8,0,0.1955839991569519
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,1,fp8,fp8,0,0.20428800582885742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,1,float16,float16,0,0.197978675365448
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,1,float16,fp8,0,0.19712533553441366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,1,fp8,fp8,0,0.20855466524759927
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,1,float16,float16,0,0.12150933345158894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,1,float16,float16,0,0.105813334385554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,1,float16,fp8,0,0.1184266706307729
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,1,fp8,fp8,0,0.12492799758911133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,1,float16,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,1,fp8,fp8,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,1,float16,float16,0,0.10616000493367513
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,1,float16,fp8,0,0.105813334385554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,1,fp8,fp8,0,0.11025599638621013
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,1,float16,float16,0,0.1065013309319814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,1,float16,fp8,0,0.1058240036169688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,1,fp8,fp8,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,1,float16,float16,0,0.1088800032933553
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,1,float16,fp8,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,1,fp8,fp8,0,0.11195733149846394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,1,float16,float16,0,0.06896000107129414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,1,float16,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,1,fp8,fp8,0,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,1,float16,float16,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,1,float16,fp8,0,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,1,fp8,fp8,0,0.06486399968465169
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,1,float16,float16,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,1,float16,fp8,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,1,float16,float16,0,0.06621333460013072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,1,float16,fp8,0,0.06381866832574208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,1,fp8,fp8,0,0.0634933312733968
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,1,float16,float16,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,1,float16,float16,0,4.7592159907023115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,1,float16,fp8,0,4.7441972096761065
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,1,fp8,fp8,0,5.3096160888671875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,1,float16,float16,0,4.8491519292195635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,1,float16,fp8,0,4.854101181030273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,1,fp8,fp8,0,5.3567148844401045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,1,float16,float16,0,4.856831868489583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,1,float16,fp8,0,4.8984800974528
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,1,fp8,fp8,0,5.36627197265625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,1,fp8,fp8,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,1,float16,fp8,0,4.9380693435668945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,1,fp8,fp8,0,5.816495895385742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,1,float16,float16,0,2.7859681447347007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,1,float16,float16,0,2.3751680056254068
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,1,float16,fp8,0,2.7373228073120117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,1,fp8,fp8,0,2.9999787012736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,1,float16,fp8,0,2.3693599700927734
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,1,fp8,fp8,0,2.6055679321289062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,1,float16,float16,0,2.4041813214619956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,1,float16,fp8,0,2.419541358947754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,1,fp8,fp8,0,2.6525012652079263
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,1,float16,float16,0,2.4178345998128257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,1,float16,fp8,0,2.4284159342447915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,1,fp8,fp8,0,2.666154702504476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,1,float16,float16,0,2.4574294090270996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,1,float16,fp8,0,2.44650665918986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,1,fp8,fp8,0,2.9083305994669595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,1,float16,float16,0,4.960938771565755
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,1,float16,float16,0,1.3902559280395508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,1,float16,float16,0,1.2025173505147297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,1,float16,fp8,0,1.2062719662984211
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,1,fp8,fp8,0,1.2989439964294434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,1,float16,fp8,0,1.3540693918863933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,1,fp8,fp8,0,1.490261395772298
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,1,float16,float16,0,1.2178773085276287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,1,float16,fp8,0,1.2113920052846272
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,1,fp8,fp8,0,1.328810691833496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,1,float16,float16,0,1.221285343170166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,1,float16,fp8,0,1.2175359725952148
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,1,fp8,fp8,0,1.3281280199686687
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,1,float16,float16,0,1.229482650756836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,1,float16,fp8,0,1.2305013338724773
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,1,fp8,fp8,0,1.461418628692627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,1,float16,float16,0,0.6143999894460043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,1,float16,float16,0,0.691210667292277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,1,float16,fp8,0,0.6806240081787109
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,1,float16,fp8,0,0.6150826613108317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,1,fp8,fp8,0,0.7502506573994955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,1,fp8,fp8,0,0.6594560146331787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,1,float16,float16,0,0.6178239981333414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,1,float16,fp8,0,0.6198613246281942
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,1,fp8,fp8,0,0.6707253456115723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,1,float16,float16,0,0.6191840171813965
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,1,float16,fp8,0,0.621237317721049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,1,fp8,fp8,0,0.6724212964375814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,1,float16,float16,0,0.6304426590601603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,1,float16,fp8,0,0.6242986520131429
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,1,fp8,fp8,0,0.7174773216247559
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,1,float16,float16,0,0.3572053511937459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,1,float16,fp8,0,0.35037867228190106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,1,float16,float16,0,0.3158986568450928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,1,fp8,fp8,0,0.3838346799214681
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,1,float16,fp8,0,0.3203413287798564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,1,fp8,fp8,0,0.3415040175120036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,1,float16,float16,0,0.31829333305358887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,1,float16,fp8,0,0.31829333305358887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,1,fp8,fp8,0,0.34935466448465985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,1,float16,float16,0,0.3203519980112712
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,1,float16,fp8,0,0.3189760049184163
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,1,fp8,fp8,0,0.34935466448465985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,1,float16,float16,0,0.3251146674156189
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,1,float16,fp8,0,0.3247893253962199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,1,fp8,fp8,0,0.3548106749852498
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,1,float16,float16,0,0.19216533501942953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,1,float16,float16,0,0.1693013310432434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,1,float16,fp8,0,0.18722132841746011
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,1,fp8,fp8,0,0.20428800582885742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,1,float16,fp8,0,0.16912533839543661
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,1,fp8,fp8,0,0.1790293256441752
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,1,float16,float16,0,0.17084799210230509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,1,float16,fp8,0,0.16846400499343872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,1,fp8,fp8,0,0.18056533734003702
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,1,float16,float16,0,0.17066667477289835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,1,float16,fp8,0,0.1718613306681315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,1,fp8,fp8,0,0.18244266510009766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,1,float16,float16,0,0.17356799046198526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,1,float16,fp8,0,0.17100799083709717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,1,fp8,fp8,0,0.1865440011024475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,1,float16,float16,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,1,float16,float16,0,0.09283733367919922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,1,float16,fp8,0,0.10547733306884766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,1,fp8,fp8,0,0.11366933584213257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,1,float16,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,1,fp8,fp8,0,0.09795733292897542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,1,float16,float16,0,0.0918239951133728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,1,float16,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,1,fp8,fp8,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,1,float16,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,1,float16,float16,0,0.09318400422732036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,1,fp8,fp8,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,1,float16,float16,0,0.09386666615804036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,1,float16,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,1,fp8,fp8,0,0.10171733299891154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,1,float16,float16,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,1,float16,float16,0,0.05530133346716563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,1,float16,fp8,0,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,1,fp8,fp8,0,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,1,fp8,fp8,0,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,1,float16,float16,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,1,float16,fp8,0,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,1,fp8,fp8,0,0.05699733396371206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,1,float16,fp8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,1,fp8,fp8,0,0.05699733396371206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,1,float16,float16,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,1,float16,fp8,0,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,1,fp8,fp8,0,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,1,float16,fp8,0,0.0365280012289683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,1,float16,float16,0,0.033786666889985405
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,1,fp8,fp8,0,0.03856533269087473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,1,float16,fp8,0,0.03481066723664602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,1,float16,float16,0,0.0341386670867602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,1,float16,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,1,float16,fp8,0,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,1,float16,float16,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,1,float16,fp8,0,1.7252747217814128
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,1,fp8,fp8,0,1.892858664194743
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,1,float16,float16,0,1.7355093955993652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,1,float16,float16,0,1.757354736328125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,1,fp8,fp8,0,1.9403093655904133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,1,float16,fp8,0,1.7546240488688152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,1,float16,float16,0,1.7634933789571126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,1,float16,fp8,0,1.771008014678955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,1,fp8,fp8,0,1.953279972076416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,1,float16,float16,0,1.7969387372334797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,1,float16,fp8,0,1.7839733759562175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,1,fp8,fp8,0,2.192725340525309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,1,float16,float16,0,0.8806400299072266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,1,float16,fp8,0,0.8772319952646891
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,1,float16,float16,0,1.0507893562316895
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,1,float16,fp8,0,1.0426026980082195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,1,fp8,fp8,0,1.1398879686991374
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,1,fp8,fp8,0,0.9480533599853516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,1,float16,float16,0,0.8871306578318278
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,1,float16,fp8,0,0.8871200084686279
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,1,fp8,fp8,0,0.9753546714782715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,1,float16,float16,0,0.8959999879201254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,1,float16,fp8,0,0.8908800284067789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,1,fp8,fp8,0,0.9801386992136637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,1,float16,float16,0,0.9130667050679525
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,1,float16,fp8,0,0.9069226582845052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,1,fp8,fp8,0,1.0941387017567952
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,1,float16,float16,0,0.5481706857681274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,1,float16,float16,0,0.45550934473673504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,1,float16,fp8,0,0.535210649172465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,1,float16,fp8,0,0.45449066162109375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,1,fp8,fp8,0,0.48554666837056476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,1,fp8,fp8,0,0.5737813313802084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,1,float16,float16,0,0.45926400025685626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,1,float16,fp8,0,0.458245317141215
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,1,fp8,fp8,0,0.49544533093770343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,1,float16,float16,0,0.4613120158513387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,1,float16,fp8,0,0.4609653155008952
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,1,fp8,fp8,0,0.49988265832265216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,1,float16,float16,0,0.47018134593963623
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,1,float16,fp8,0,0.4657493432362874
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,1,fp8,fp8,0,0.5519359906514486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,1,float16,float16,0,0.24200532833735147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,1,float16,float16,0,0.2868906656901042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,1,float16,fp8,0,0.24268800020217896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,1,float16,fp8,0,0.28143467505772907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,1,fp8,fp8,0,0.2974720001220703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,1,fp8,fp8,0,0.2539520064989726
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,1,float16,float16,0,0.24337067206700644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,1,float16,fp8,0,0.24200532833735147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,1,fp8,fp8,0,0.2566773295402527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,1,float16,float16,0,0.24337067206700644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,1,float16,fp8,0,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,1,fp8,fp8,0,0.25941334168116253
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,1,float16,float16,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,1,float16,fp8,0,0.2481493353843689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,1,fp8,fp8,0,0.2688000003496806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,1,float16,float16,0,0.15871999661127725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,1,float16,fp8,0,0.1520639955997467
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,1,float16,float16,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,1,fp8,fp8,0,0.16366400321324667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,1,float16,fp8,0,0.13140799601872763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,1,fp8,fp8,0,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,1,float16,float16,0,0.13158399860064188
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,1,float16,fp8,0,0.13141333063443503
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,1,fp8,fp8,0,0.13875733812650046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,1,float16,float16,0,0.1341546674569448
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,1,fp8,fp8,0,0.13960533340771994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,1,float16,float16,0,0.13704533378283182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,1,float16,fp8,0,0.13294933239618936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,1,float16,fp8,0,0.13294399778048197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,1,fp8,fp8,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,1,float16,float16,0,0.09352533022562663
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,1,float16,fp8,0,0.08737599849700928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,1,fp8,fp8,0,0.09352533022562663
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,1,float16,float16,0,0.07406400144100189
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,1,float16,fp8,0,0.07337066531181335
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,1,fp8,fp8,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,1,fp8,fp8,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,1,float16,float16,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,1,float16,fp8,0,0.050853331883748375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,1,float16,fp8,0,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,1,float16,float16,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,1,fp8,fp8,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,1,float16,float16,0,0.024847999215126038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,1,float16,float16,0,0.024847999215126038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,1,float16,float16,0,0.024847999215126038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,1,float16,float16,0,0.9381546974182129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,1,float16,fp8,0,0.9306506315867106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,1,fp8,fp8,0,0.9852586587270101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,1,float16,float16,0,0.9507839679718018
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,1,float16,fp8,0,0.9521439870198568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,1,fp8,fp8,0,1.0129066308339436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,1,float16,float16,0,0.9617066383361816
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,1,float16,fp8,0,0.9524906476338705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,1,fp8,fp8,0,1.0248586336771648
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,1,float16,float16,0,0.9743359883626302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,1,float16,fp8,0,0.971946636835734
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,1,fp8,fp8,0,1.1354453563690186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,1,float16,float16,0,0.5707093477249146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,1,float16,fp8,0,0.5556960105895996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,1,float16,float16,0,0.4787199894587199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,1,float16,fp8,0,0.4753066698710124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,1,fp8,fp8,0,0.6000639994939169
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,1,fp8,fp8,0,0.5032960176467896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,1,float16,float16,0,0.4838399887084961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,1,float16,fp8,0,0.4817813237508138
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,1,fp8,fp8,0,0.5142133235931396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,1,float16,float16,0,0.4893173376719157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,1,float16,fp8,0,0.4872479836146037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,1,fp8,fp8,0,0.516266663869222
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,1,float16,float16,0,0.49817601839701336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,1,float16,fp8,0,0.4916906754175822
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,1,fp8,fp8,0,0.5631999969482422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,1,fp8,fp8,0,0.04743466774622599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,1,float16,float16,0,0.29576534032821655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,1,float16,fp8,0,0.28995732466379803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,1,float16,float16,0,0.2501973311106364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,1,fp8,fp8,0,0.31214932600657147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,1,float16,fp8,0,0.24951465924580893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,1,fp8,fp8,0,0.2621493339538574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,1,float16,float16,0,0.25463465849558514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,1,float16,fp8,0,0.25190399090449017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,1,fp8,fp8,0,0.26641066869099933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,1,float16,float16,0,0.25668267409006756
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,1,float16,fp8,0,0.2525866627693176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,1,fp8,fp8,0,0.26879467566808063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,1,float16,float16,0,0.2600959936777751
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,1,float16,fp8,0,0.26077866554260254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,1,fp8,fp8,0,0.2786933382352193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,1,float16,float16,0,0.1604213317235311
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,1,float16,float16,0,0.13432000080744425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,1,float16,fp8,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,1,float16,fp8,0,0.15530666708946228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,1,fp8,fp8,0,0.168778657913208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,1,fp8,fp8,0,0.14114133516947427
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,1,float16,float16,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,1,float16,fp8,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,1,fp8,fp8,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,1,float16,float16,0,0.13635200262069702
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,1,float16,fp8,0,0.13619200388590494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,1,fp8,fp8,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,1,float16,float16,0,0.14062933127085367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,1,float16,fp8,0,0.13823999961217245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,1,fp8,fp8,0,0.14711466431617737
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,1,float16,float16,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,1,float16,fp8,0,0.08635200063387553
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,1,float16,float16,0,0.07407466570536296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,1,fp8,fp8,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,1,float16,float16,0,0.07441600163777669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,1,float16,fp8,0,0.07441600163777669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,1,fp8,fp8,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,1,float16,float16,0,0.07577066620190938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,1,float16,fp8,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,1,fp8,fp8,0,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,1,fp8,fp8,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,1,fp8,fp8,0,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,1,float16,float16,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,1,float16,float16,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,1,float16,float16,0,0.032069332897663116
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,1,float16,fp8,0,0.03207999964555105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,1,float16,fp8,0,0.03276266654332479
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,1,float16,float16,0,0.03207999964555105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,1,float16,float16,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,1,fp8,fp8,0,0.022815999885400135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,1,float16,float16,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,1,fp8,fp8,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,1,float16,float16,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,1,float16,float16,0,0.018837332725524902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,1,float16,float16,0,0.018592000007629395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,1,float16,float16,0,0.01860800012946129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,1,float16,float16,0,0.6401706536610922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,1,float16,fp8,0,0.6353919903437296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,1,fp8,fp8,0,0.7150932947794596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,1,float16,float16,0,0.6473386685053507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,1,float16,fp8,0,0.6408533255259196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,1,fp8,fp8,0,0.724992036819458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,1,float16,float16,0,0.6446133454640707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,1,float16,fp8,0,0.641541322072347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,1,fp8,fp8,0,0.7283999919891357
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,1,float16,fp8,0,0.6480106512705485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,1,fp8,fp8,0,0.7649280230204264
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,1,float16,float16,0,0.37085866928100586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,1,float16,fp8,0,0.3660800059636434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,1,float16,float16,0,0.32922132809956867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,1,float16,fp8,0,0.3295573393503825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,1,fp8,fp8,0,0.41334935029347736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,1,fp8,fp8,0,0.36745067437489826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,1,float16,float16,0,0.3326293428738912
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,1,float16,fp8,0,0.3319466710090637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,1,fp8,fp8,0,0.3712000052134196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,1,float16,float16,0,0.3319466710090637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,1,float16,fp8,0,0.3312586744626363
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,1,fp8,fp8,0,0.37118931611378986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,1,float16,float16,0,0.337066650390625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,1,float16,fp8,0,0.3356906572977702
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,1,fp8,fp8,0,0.3797333240509033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,1,float16,float16,0,0.19780266284942627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,1,float16,fp8,0,0.19548267126083374
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,1,float16,float16,0,0.17459199825922647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,1,fp8,fp8,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,1,float16,fp8,0,0.17527467012405396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,1,fp8,fp8,0,0.19301867485046387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,1,float16,float16,0,0.1742560068766276
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,1,float16,fp8,0,0.17442133029301962
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,1,fp8,fp8,0,0.19404800732930502
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,1,float16,float16,0,0.17561600605646768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,1,float16,fp8,0,0.1752799948056539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,1,fp8,fp8,0,0.1954186757405599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,1,float16,float16,0,0.17868266503016153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,1,float16,fp8,0,0.176639993985494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,1,fp8,fp8,0,0.19831466674804688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,1,float16,float16,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,1,float16,fp8,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,1,fp8,fp8,0,0.11946666240692139
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,1,float16,float16,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,1,fp8,fp8,0,0.10206400354703267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,1,float16,float16,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,1,float16,fp8,0,0.0942133367061615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,1,fp8,fp8,0,0.10273599624633789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,1,float16,float16,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,1,float16,fp8,0,0.09454400340716045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,1,fp8,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,1,float16,fp8,0,0.09624532858530681
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,1,float16,float16,0,0.09660266836484273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,1,fp8,fp8,0,0.106495996316274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,1,float16,fp8,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,1,fp8,fp8,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,1,fp8,fp8,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,1,fp8,fp8,0,0.05972266693909963
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,1,float16,float16,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,1,fp8,fp8,0,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,1,float16,fp8,0,0.03788266579310099
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,1,fp8,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,1,fp8,fp8,0,0.025946666797002155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,1,fp8,fp8,0,0.0262719988822937
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,1,float16,float16,0,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,1,float16,fp8,0,0.025936000049114227
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,1,float16,float16,0,0.025610665480295818
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,1,fp8,fp8,0,0.020661332954963047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,1,float16,float16,0,0.01886933296918869
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,1,float16,float16,0,0.01868266612291336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,1,float16,fp8,0,0.01860800012946129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,1,float16,float16,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,1,float16,float16,0,0.5138773520787557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,1,float16,fp8,0,0.5142186482747396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,1,fp8,fp8,0,0.5806080102920532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,1,float16,float16,0,0.5159253279368082
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,1,float16,fp8,0,0.5145599842071533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,1,float16,float16,0,0.6580906709035238
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,1,fp8,fp8,0,0.585040012995402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,1,float16,float16,0,0.5152426560719808
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,1,float16,fp8,0,0.514901320139567
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,1,fp8,fp8,0,0.5789013306299845
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,1,float16,float16,0,0.5196853478749593
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,1,float16,fp8,0,0.5200213193893433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,1,fp8,fp8,0,0.5922133525212606
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,1,float16,float16,0,0.2892799973487854
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,1,float16,fp8,0,0.2851840058962504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,1,fp8,fp8,0,0.32204800844192505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,1,float16,float16,0,0.26743467648824054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,1,float16,fp8,0,0.26639999945958454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,1,fp8,fp8,0,0.2998613317807515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,1,float16,float16,0,0.26743467648824054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,1,float16,fp8,0,0.26812267303466797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,1,fp8,fp8,0,0.30088533957799274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,1,float16,float16,0,0.2681120038032532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,1,float16,fp8,0,0.266757329305013
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,1,fp8,fp8,0,0.3017066717147827
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,1,float16,float16,0,0.27050666014353436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,1,float16,fp8,0,0.2701706687609355
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,1,fp8,fp8,0,0.3042880098025004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,1,float16,float16,0,0.1527466674645742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,1,float16,fp8,0,0.1513813336690267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,1,fp8,fp8,0,0.1742453376452128
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,1,float16,float16,0,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,1,float16,fp8,0,0.14165332913398743
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,1,fp8,fp8,0,0.15530666708946228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,1,float16,float16,0,0.1418239971001943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,1,float16,fp8,0,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,1,fp8,fp8,0,0.15632533033688864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,1,float16,float16,0,0.14097066720326742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,1,float16,fp8,0,0.14113600055376688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,1,fp8,fp8,0,0.15581867098808289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,1,float16,float16,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,1,float16,fp8,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,1,fp8,fp8,0,0.16127999623616537
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,1,float16,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,1,float16,float16,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,1,fp8,fp8,0,0.09455466270446777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,1,float16,float16,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,1,float16,fp8,0,0.07951466739177704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,1,float16,fp8,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,1,float16,float16,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,1,float16,fp8,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,1,fp8,fp8,0,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,1,float16,float16,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,1,fp8,fp8,0,0.052560001611709595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,1,float16,float16,0,0.04914666712284088
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,1,float16,fp8,0,0.04743466774622599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,1,fp8,fp8,0,0.0529120018084844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,1,fp8,fp8,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,1,float16,float16,0,0.0317546675602595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,1,float16,fp8,0,0.03207999964555105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,1,float16,fp8,0,0.02274666726589203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,1,fp8,fp8,0,0.02275199939807256
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,1,float16,fp8,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,1,float16,float16,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,1,float16,fp8,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,1,float16,float16,0,0.01836266616980235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,1,float16,float16,0,0.01850133389234543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,1,float16,float16,0,0.45311999320983887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,1,float16,fp8,0,0.4534720182418823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,1,fp8,fp8,0,0.5101226568222046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,1,float16,float16,0,0.45482667287190753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,1,float16,fp8,0,0.4524373213450114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,1,fp8,fp8,0,0.5094399849573771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,1,float16,float16,0,0.4517546494801839
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,1,float16,fp8,0,0.4527786572774251
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,1,fp8,fp8,0,0.5084160168965658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,1,float16,float16,0,0.45313068230946857
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,1,float16,fp8,0,0.45482667287190753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,1,fp8,fp8,0,0.5128639936447144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,1,float16,float16,0,0.24644800027211508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,1,float16,float16,0,0.233130673567454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,1,float16,fp8,0,0.24576000372568765
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,1,fp8,fp8,0,0.2773173252741496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,1,float16,fp8,0,0.23347733418146768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,1,fp8,fp8,0,0.2604373296101888
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,1,float16,float16,0,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,1,float16,fp8,0,0.23278399308522543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,1,fp8,fp8,0,0.2611253261566162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,1,float16,float16,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,1,float16,fp8,0,0.23381332556406656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,1,fp8,fp8,0,0.2621440092722575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,1,float16,float16,0,0.23722134033838907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,1,float16,fp8,0,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,1,fp8,fp8,0,0.26606933275858563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,1,float16,float16,0,0.1322773297627767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,1,float16,float16,0,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,1,float16,fp8,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,1,fp8,fp8,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,1,float16,fp8,0,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,1,fp8,fp8,0,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,1,float16,float16,0,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,1,float16,fp8,0,0.12559466560681662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,1,fp8,fp8,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,1,float16,float16,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,1,float16,fp8,0,0.12562132875124613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,1,fp8,fp8,0,0.13994666934013367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,1,float16,float16,0,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,1,fp8,fp8,0,0.13944000005722046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,1,float16,float16,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,1,float16,fp8,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,1,fp8,fp8,0,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,1,float16,float16,0,0.07201600074768066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,1,fp8,fp8,0,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,1,float16,float16,0,0.07202666501204173
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,1,float16,fp8,0,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,1,float16,fp8,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,1,float16,fp8,0,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,1,float16,float16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,1,float16,float16,0,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,1,float16,float16,0,0.021749332547187805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,1,float16,float16,0,0.02279466638962428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,1,fp8,fp8,0,0.016645333419243496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,1,fp8,fp8,0,0.017680000513792038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,1,float16,float16,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,1,float16,fp8,0,0.018021332720915478
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,1,fp8,fp8,0,0.01802666609485944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,1,float16,float16,0,14.145882924397787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,1,float16,fp8,0,14.5961062113444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,1,fp8,fp8,0,11.040426890055338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,1,float16,float16,0,15.246847788492838
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,1,float16,fp8,0,14.373728434244791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,1,fp8,fp8,0,11.043839772542318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,1,float16,float16,0,14.792869567871094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,1,float16,fp8,0,14.686548868815104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,1,fp8,fp8,0,11.06329600016276
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,1,float16,float16,0,14.32525380452474
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,1,float16,fp8,0,15.185413360595703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,1,fp8,fp8,0,11.211775461832682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,1,float16,float16,0,7.363424301147461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,1,float16,fp8,0,7.277055740356445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,1,float16,float16,0,7.073797225952148
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,1,float16,fp8,0,6.7739410400390625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,1,fp8,fp8,0,5.821946461995442
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,1,fp8,fp8,0,5.620735804239909
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,1,float16,float16,0,6.728709538777669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,1,float16,fp8,0,6.607018788655599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,1,fp8,fp8,0,5.6186879475911455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,1,float16,float16,0,6.388394673665364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,1,float16,fp8,0,7.1941172281901045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,1,fp8,fp8,0,5.63371213277181
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,1,float16,float16,0,6.974490483601888
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,1,float16,fp8,0,6.87889035542806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,1,fp8,fp8,0,5.659472147623698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,1,float16,float16,0,3.446101188659668
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,1,float16,fp8,0,3.57478396097819
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,1,float16,float16,0,3.3938773473103843
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,1,float16,fp8,0,3.260586738586426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,1,fp8,fp8,0,3.0472532908121743
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,1,fp8,fp8,0,2.950144131978353
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,1,float16,fp8,0,3.3783467610677085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,1,float16,float16,0,3.396261215209961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,1,fp8,fp8,0,2.950826644897461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,1,float16,float16,0,3.441322644551595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,1,fp8,fp8,0,2.954922676086426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,1,float16,fp8,0,3.351381301879883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,1,float16,float16,0,3.4269866943359375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,1,float16,fp8,0,3.451903978983561
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,1,float16,float16,0,1.8546346028645833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,1,fp8,fp8,0,2.971306800842285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,1,float16,float16,0,1.8524212837219238
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,1,float16,fp8,0,1.837056001027425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,1,float16,fp8,0,1.9136853218078613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,1,fp8,fp8,0,1.6600747108459473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,1,fp8,fp8,0,1.6139945983886719
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,1,float16,float16,0,1.836714744567871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,1,float16,fp8,0,1.8396107355753581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,1,fp8,fp8,0,1.6153705914815266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,1,float16,float16,0,1.8157226244608562
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,1,float16,fp8,0,1.8522453308105469
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,1,fp8,fp8,0,1.6197973887125652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,1,float16,float16,0,1.8662400245666504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,1,float16,fp8,0,1.8275039990743
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,1,fp8,fp8,0,1.6300373077392578
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,1,float16,float16,0,8.074421564737955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,1,float16,fp8,0,8.335023880004883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,1,fp8,fp8,0,6.591999689737956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,1,float16,float16,0,8.167088190714518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,1,float16,fp8,0,8.113328297932943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,1,fp8,fp8,0,6.611797332763672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,1,float16,float16,0,8.030047734578451
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,1,float16,fp8,0,8.190122604370117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,1,fp8,fp8,0,6.6261336008707685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,1,float16,float16,0,8.30839474995931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,1,float16,fp8,0,8.410122553507486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,1,fp8,fp8,0,6.670495986938477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,1,float16,float16,0,3.890000025431315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,1,float16,float16,0,4.000597318013509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,1,float16,fp8,0,4.175871849060059
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,1,float16,fp8,0,3.9901758829752603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,1,fp8,fp8,0,3.3960959116617837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,1,fp8,fp8,0,3.553114573160807
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,1,float16,float16,0,3.9116799036661782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,1,float16,fp8,0,3.9942827224731445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,1,fp8,fp8,0,3.3984851837158203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,1,float16,float16,0,3.860992113749186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,1,float16,fp8,0,3.981482823689779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,1,fp8,fp8,0,3.407696088155111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,1,float16,float16,0,3.888298670450846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,1,float16,fp8,0,3.8504212697347007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,1,float16,float16,0,2.1411840120951333
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,1,fp8,fp8,0,3.4355252583821616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,1,float16,fp8,0,2.2050132751464844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,1,float16,float16,0,2.053808053334554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,1,float16,fp8,0,2.070016066233317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,1,fp8,fp8,0,1.8783574104309082
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,1,fp8,fp8,0,1.8037919998168945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,1,float16,float16,0,2.0520960489908853
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,1,float16,fp8,0,2.0741119384765625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,1,fp8,fp8,0,1.8058346112569172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,1,float16,float16,0,2.0872534116109214
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,1,float16,fp8,0,2.059434731801351
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,1,fp8,fp8,0,1.8211839993794758
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,1,float16,float16,0,2.038442611694336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,1,float16,fp8,0,2.0712107022603354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,1,fp8,fp8,0,1.819818655649821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,1,float16,float16,0,1.1922773520151775
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,1,float16,fp8,0,1.1871679623921711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,1,float16,float16,0,1.1733333269755046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,1,float16,fp8,0,1.1857866446177165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,1,fp8,fp8,0,1.014954646428426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,1,fp8,fp8,0,1.0473919709523518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,1,float16,float16,0,1.1600213050842285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,1,float16,fp8,0,1.1724800268809001
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,1,fp8,fp8,0,1.0159786542256672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,1,float16,fp8,0,1.1801599661509197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,1,fp8,fp8,0,1.0238293011983235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,1,float16,float16,0,1.1630933284759521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,1,float16,float16,0,1.1648000081380208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,1,float16,fp8,0,1.1854506333669026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,1,fp8,fp8,0,1.0262186527252197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,1,float16,float16,0,5.335552215576172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,1,float16,fp8,0,5.498197555541992
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,1,fp8,fp8,0,4.820992151896159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,1,float16,float16,0,5.402965545654297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,1,float16,fp8,0,5.3918717702229815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,1,fp8,fp8,0,4.804949442545573
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,1,float16,float16,0,5.536432266235352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,1,float16,fp8,0,5.455872217814128
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,1,fp8,fp8,0,4.828330675760905
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,1,float16,float16,0,5.35807991027832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,1,float16,fp8,0,5.476010640462239
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,1,fp8,fp8,0,4.866389274597168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,1,float16,float16,0,2.9194186528523765
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,1,float16,float16,0,2.964138666788737
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,1,float16,fp8,0,2.93887996673584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,1,float16,fp8,0,2.8530346552530923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,1,fp8,fp8,0,2.484224001566569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,1,fp8,fp8,0,2.619904041290283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,1,float16,float16,0,2.7589972813924155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,1,float16,fp8,0,2.8668479919433594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,1,fp8,fp8,0,2.486954689025879
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,1,float16,float16,0,2.8388694127400718
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,1,float16,fp8,0,2.7804905573527017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,1,fp8,fp8,0,2.50982395807902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,1,float16,float16,0,2.981210708618164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,1,float16,fp8,0,2.864975929260254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,1,fp8,fp8,0,2.515626589457194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,1,float16,float16,0,1.578506628672282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,1,float16,fp8,0,1.6221920649210613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,1,float16,float16,0,1.5083519617716472
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,1,float16,fp8,0,1.558527946472168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,1,fp8,fp8,0,1.3974186579386394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,1,fp8,fp8,0,1.334272066752116
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,1,float16,fp8,0,1.535322666168213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,1,float16,float16,0,1.528490702311198
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,1,fp8,fp8,0,1.3390560150146484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,1,float16,float16,0,1.543168067932129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,1,fp8,fp8,0,1.3397332827250164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,1,float16,fp8,0,1.5049386024475098
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,1,float16,float16,0,1.5162026087443035
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,1,float16,fp8,0,1.5626293818155925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,1,float16,float16,0,0.8758613268534342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,1,fp8,fp8,0,1.355082670847575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,1,float16,fp8,0,0.9062399864196777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,1,float16,fp8,0,0.8622079690297445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,1,float16,float16,0,0.8622079690297445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,1,fp8,fp8,0,0.7625493208567301
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,1,fp8,fp8,0,0.7961653073628744
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,1,float16,fp8,0,0.866645336151123
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,1,fp8,fp8,0,0.763205369313558
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,1,float16,float16,0,0.8738079865773519
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,1,float16,fp8,0,0.8584533532460531
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,1,fp8,fp8,0,0.7659520308176676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,1,float16,float16,0,0.8799146811167399
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,1,float16,fp8,0,0.8738133112589518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,1,fp8,fp8,0,0.7700479825337728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,1,float16,float16,0,7.706458409627278
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,1,float16,fp8,0,7.708677291870117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,1,fp8,fp8,0,6.488576253255208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,1,float16,float16,0,7.758511861165364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,1,float16,fp8,0,7.79417610168457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,1,fp8,fp8,0,6.504789352416992
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,1,float16,float16,0,8.153258641560873
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,1,float16,fp8,0,7.879519780476888
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,1,fp8,fp8,0,6.533109029134114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,1,float16,fp8,0,7.9008534749348955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,1,float16,float16,0,7.790426890055339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,1,fp8,fp8,0,6.580048243204753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,1,float16,float16,0,3.8616746266682944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,1,float16,fp8,0,3.9569066365559897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,1,fp8,fp8,0,3.50930118560791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,1,float16,float16,0,3.657557487487793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,1,float16,fp8,0,3.7539841334025064
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,1,fp8,fp8,0,3.2887465159098306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,1,float16,float16,0,0.8792746861775717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,1,float16,float16,0,3.7541545232137046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,1,float16,fp8,0,3.9231093724568686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,1,fp8,fp8,0,3.2979679107666016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,1,float16,float16,0,3.7835092544555664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,1,fp8,fp8,0,3.3133227030436196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,1,float16,fp8,0,3.707050641377767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,1,float16,float16,0,3.84768009185791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,1,float16,fp8,0,3.865429242451986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,1,float16,float16,0,2.009082635243734
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,1,fp8,fp8,0,3.3423360188802085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,1,float16,fp8,0,2.0519253412882485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,1,fp8,fp8,0,1.8235732714335124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,1,float16,float16,0,1.9082132975260417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,1,float16,fp8,0,1.9367252985636394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,1,fp8,fp8,0,1.7167359987894695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,1,float16,float16,0,1.9510614077250164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,1,fp8,fp8,0,1.7245920499165852
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,1,float16,fp8,0,1.9092480341593425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,1,float16,float16,0,1.9403093655904133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,1,float16,fp8,0,1.963007926940918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,1,fp8,fp8,0,1.7269813219706218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,1,float16,float16,0,1.9276746114095051
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,1,float16,fp8,0,1.9585706392923992
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,1,fp8,fp8,0,1.742677370707194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,1,float16,float16,0,1.1156480312347412
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,1,float16,fp8,0,1.084234635035197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,1,float16,float16,0,1.0832213560740154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,1,float16,fp8,0,1.0818506876627605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,1,fp8,fp8,0,0.9869653383890787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,1,fp8,fp8,0,0.9350826740264893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,1,float16,float16,0,1.0873173077901204
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,1,float16,fp8,0,1.0801493326822917
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,1,fp8,fp8,0,0.9388319651285807
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,1,float16,float16,0,1.068885326385498
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,1,float16,fp8,0,1.0920960108439128
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,1,fp8,fp8,0,0.947370688120524
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,1,float16,float16,0,1.0504533449808757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,1,float16,fp8,0,1.0569226741790771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,1,fp8,fp8,0,0.9514666398366293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,1,float16,float16,0,0.6398293177286783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,1,float16,fp8,0,0.6435840129852295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,1,float16,float16,0,0.6191786527633667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,1,fp8,fp8,0,0.5703680117925009
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,1,float16,fp8,0,0.6157653331756592
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,1,fp8,fp8,0,0.5461333195368449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,1,float16,float16,0,0.6188426812489828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,1,float16,fp8,0,0.6106453339258829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,1,fp8,fp8,0,0.5488640069961548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,1,float16,float16,0,0.6191786527633667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,1,float16,fp8,0,0.6205439964930216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,1,fp8,fp8,0,0.5485226710637411
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,1,float16,float16,0,0.6133759816487631
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,1,float16,fp8,0,0.6178133487701416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,1,fp8,fp8,0,0.5533013343811035
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,1,float16,float16,0,4.500992139180501
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,1,float16,fp8,0,4.41429328918457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,1,fp8,fp8,0,4.054880142211914
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,1,float16,float16,0,4.565674781799316
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,1,float16,fp8,0,4.4914506276448565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,1,fp8,fp8,0,4.0552107493082685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,1,float16,float16,0,4.480170567830403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,1,float16,fp8,0,4.565343856811523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,1,fp8,fp8,0,4.071765263875325
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,1,float16,float16,0,4.627285321553548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,1,float16,fp8,0,4.550826708475749
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,1,fp8,fp8,0,4.117834726969401
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,1,float16,float16,0,2.316981315612793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,1,float16,float16,0,2.4690346717834473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,1,float16,fp8,0,2.4789387385050454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,1,float16,fp8,0,2.3237973848978677
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,1,fp8,fp8,0,2.2307839393615723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,1,fp8,fp8,0,2.0657493273417153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,1,float16,float16,0,2.298879941304525
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,1,float16,fp8,0,2.3224426905314126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,1,fp8,fp8,0,2.074277400970459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,1,float16,float16,0,2.2901760737101235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,1,fp8,fp8,0,2.082815965016683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,1,float16,float16,0,2.332159996032715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,1,float16,fp8,0,2.3125386238098145
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,1,fp8,fp8,0,2.0998826026916504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,1,float16,float16,0,1.2784586747487385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,1,float16,fp8,0,1.3057706356048584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,1,float16,float16,0,1.2127573490142822
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,1,fp8,fp8,0,1.170261303583781
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,1,float16,fp8,0,1.1933013598124187
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,1,fp8,fp8,0,1.0927786827087402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,1,float16,float16,0,1.257472038269043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,1,float16,fp8,0,1.2335786819458008
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,1,fp8,fp8,0,1.097215970357259
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,1,float16,float16,0,1.2641279697418213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,1,float16,fp8,0,1.2247040271759033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,1,fp8,fp8,0,1.1016426881154378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,1,float16,float16,0,1.2726613680521648
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,1,float16,fp8,0,1.2503039836883545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,1,fp8,fp8,0,1.1105279922485352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,1,float16,float16,0,0.7150932947794596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,1,float16,fp8,0,0.706559975941976
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,1,fp8,fp8,0,0.6452906529108683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,1,float16,float16,0,0.6884640057881674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,1,float16,fp8,0,0.665605346361796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,1,fp8,fp8,0,0.6075733502705892
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,1,float16,float16,0,0.6908586819966634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,1,float16,fp8,0,0.6799306869506836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,1,fp8,fp8,0,0.6089440186818441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,1,float16,float16,0,0.694271961847941
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,1,float16,fp8,0,0.6843679745992025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,1,fp8,fp8,0,0.6113280057907104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,1,float16,float16,0,0.7004160086313883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,1,float16,fp8,0,0.6884693304697672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,1,fp8,fp8,0,0.6171306769053141
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,1,float16,float16,0,0.42239999771118164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,1,float16,fp8,0,0.42342400550842285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,1,float16,float16,0,0.4007253249486287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,1,fp8,fp8,0,0.3821226755777995
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,1,float16,fp8,0,0.3973120053609212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,1,fp8,fp8,0,0.3619840145111084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,1,float16,float16,0,0.3996959924697876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,1,float16,fp8,0,0.3990186850229899
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,1,fp8,fp8,0,0.36403199036916095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,1,float16,float16,0,0.40276801586151123
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,1,float16,fp8,0,0.4007253249486287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,1,fp8,fp8,0,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,1,float16,float16,0,0.4072106679280599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,1,float16,fp8,0,0.4065279960632324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,1,fp8,fp8,0,0.36948267618815106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,1,float16,float16,0,4.679535865783691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,1,float16,fp8,0,4.650154749552409
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,1,fp8,fp8,0,4.3412478764851885
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,1,float16,float16,0,4.668586730957031
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,1,float16,fp8,0,4.640250523885091
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,1,fp8,fp8,0,4.360021273295085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,1,float16,fp8,0,4.6638078689575195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,1,float16,float16,0,4.8885758717854815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,1,fp8,fp8,0,4.377770741780599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,1,float16,fp8,0,2.3261760075887046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,1,float16,float16,0,4.685317357381185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,1,float16,fp8,0,4.791978518168132
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,1,fp8,fp8,0,4.431189219156901
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,1,float16,float16,0,2.5465173721313477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,1,float16,float16,0,2.3980372746785483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,1,float16,fp8,0,2.3502559661865234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,1,fp8,fp8,0,2.1821440060933432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,1,float16,float16,0,2.417840003967285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,1,fp8,fp8,0,2.400773366292318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,1,float16,fp8,0,2.5506134033203125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,1,float16,fp8,0,2.3693599700927734
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,1,fp8,fp8,0,2.1951146125793457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,1,float16,fp8,0,2.351616064707438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,1,fp8,fp8,0,2.217642625172933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,1,float16,float16,0,2.393258730570475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,1,float16,fp8,0,2.376533349355062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,1,fp8,fp8,0,2.226016044616699
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,1,float16,float16,0,1.3107199668884277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,1,float16,fp8,0,1.3161866664886475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,1,float16,float16,0,1.2533653577168782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,1,fp8,fp8,0,1.2462133566538494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,1,float16,fp8,0,1.2083199818929036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,1,fp8,fp8,0,1.13919997215271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,1,float16,float16,0,1.2303360303243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,1,float16,fp8,0,1.2151467005411785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,1,fp8,fp8,0,1.1450026830037434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,1,float16,float16,0,1.2689066727956135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,1,float16,fp8,0,1.2482453187306721
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,1,fp8,fp8,0,1.1528533299763997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,1,float16,float16,0,1.245695988337199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,1,float16,fp8,0,1.2363039652506511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,1,fp8,fp8,0,1.1654826800028484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,1,float16,float16,0,0.7219200134277344
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,1,float16,fp8,0,0.7215733528137207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,1,float16,float16,0,0.6761813163757324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,1,float16,fp8,0,0.6642346779505411
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,1,fp8,fp8,0,0.6683306694030762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,1,fp8,fp8,0,0.6130239963531494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,1,float16,float16,0,0.6666239897410074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,1,float16,fp8,0,0.6748159726460775
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,1,fp8,fp8,0,0.6167786518732706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,1,float16,float16,0,0.6693546772003174
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,1,float16,fp8,0,0.6809600194295248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,1,fp8,fp8,0,0.6181546847025553
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,1,float16,float16,0,0.6751573085784912
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,1,float16,fp8,0,0.6901866594950358
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,1,fp8,fp8,0,0.6247040033340454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,1,float16,float16,0,0.4007146755854289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,1,float16,fp8,0,0.4123306671778361
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,1,float16,float16,0,0.37324798107147217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,1,float16,fp8,0,0.37939198811848956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,1,fp8,fp8,0,0.375983993212382
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,1,fp8,fp8,0,0.3500586748123169
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,1,float16,float16,0,0.3810986677805583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,1,float16,fp8,0,0.38041067123413086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,1,fp8,fp8,0,0.34969600041707355
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,1,float16,float16,0,0.3834826548894246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,1,float16,fp8,0,0.3831466833750407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,1,fp8,fp8,0,0.35140268007914227
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,1,float16,float16,0,0.38997332255045575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,1,float16,fp8,0,0.389631986618042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,1,fp8,fp8,0,0.3548106749852498
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,1,float16,fp8,0,0.2512213389078776
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,1,float16,float16,0,0.2481493353843689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,1,fp8,fp8,0,0.2307413419087728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,1,float16,float16,0,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,1,float16,fp8,0,0.23381332556406656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,1,fp8,fp8,0,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,1,float16,float16,0,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,1,float16,fp8,0,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,1,fp8,fp8,0,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,1,float16,float16,0,0.233130673567454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,1,float16,fp8,0,0.23550933599472046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,1,fp8,fp8,0,0.21436800559361777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,1,float16,float16,0,0.23449599742889404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,1,float16,fp8,0,0.23825067281723022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,1,fp8,fp8,0,0.21913067499796549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,1,float16,float16,0,3.0489705403645835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,1,float16,fp8,0,3.039408047993978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,1,fp8,fp8,0,2.908672014872233
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,1,float16,float16,0,3.053738594055176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,1,float16,fp8,0,3.0506668090820312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,1,fp8,fp8,0,2.9236907958984375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,1,float16,float16,0,2.3731199900309243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,1,float16,float16,0,3.0796801249186196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,1,fp8,fp8,0,2.9416160583496094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,1,float16,float16,0,3.1165440877278647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,1,float16,fp8,0,3.1213226318359375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,1,fp8,fp8,0,2.983935991923014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,1,float16,float16,0,1.6841386159261067
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,1,float16,float16,0,1.5493119557698567
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,1,float16,fp8,0,1.7034239768981934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,1,fp8,fp8,0,1.6354986826578777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,1,float16,fp8,0,1.5513599713643391
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,1,fp8,fp8,0,1.4772906303405762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,1,float16,float16,0,1.5626239776611328
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,1,float16,fp8,0,1.5732053120930989
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,1,fp8,fp8,0,1.4817280769348145
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,1,float16,float16,0,1.5547679265340169
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,1,float16,fp8,0,1.566549301147461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,1,fp8,fp8,0,1.4950453440348308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,1,float16,float16,0,1.5723519325256348
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,1,float16,fp8,0,1.607850710550944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,1,fp8,fp8,0,1.5168852806091309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,1,float16,float16,0,0.8861013253529867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,1,float16,fp8,0,0.909658670425415
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,1,float16,float16,0,0.8173279762268066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,1,fp8,fp8,0,0.8553813298543295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,1,float16,fp8,0,0.8279039859771729
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,1,fp8,fp8,0,0.7744853496551514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,1,float16,float16,0,0.8255146344502767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,1,float16,fp8,0,0.8285866578420004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,1,fp8,fp8,0,0.7782399654388428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,1,float16,float16,0,0.8381439844767252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,1,float16,fp8,0,0.8343893686930338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,1,fp8,fp8,0,0.7830186684926351
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,1,float16,float16,0,0.8468480110168457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,1,float16,fp8,0,0.8471893469492594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,1,fp8,fp8,0,0.7941173712412516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,1,float16,float16,0,0.48657600084940594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,1,float16,fp8,0,0.4927146832148234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,1,fp8,fp8,0,0.46164798736572266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,1,float16,float16,0,0.44970667362213135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,1,float16,fp8,0,0.44969598452250165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,1,fp8,fp8,0,0.4227413336435954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,1,float16,float16,0,0.4538026650746663
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,1,float16,fp8,0,0.45073068141937256
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,1,fp8,fp8,0,0.4237653414408366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,1,float16,float16,0,0.45687464872996014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,1,float16,fp8,0,0.45482667287190753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,1,fp8,fp8,0,0.4254719813664754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,1,float16,float16,0,0.4624106486638387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,1,float16,fp8,0,0.4626773198445638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,1,fp8,fp8,0,0.430245320002238
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,1,float16,float16,0,0.279039998849233
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,1,float16,fp8,0,0.283135990301768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,1,float16,float16,0,0.25600000222524005
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,1,float16,fp8,0,0.25566399097442627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,1,fp8,fp8,0,0.2653866608937581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,1,fp8,fp8,0,0.24166399240493774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,1,float16,float16,0,0.25600000222524005
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,1,float16,fp8,0,0.25497599442799884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,1,fp8,fp8,0,0.24370666344960532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,1,float16,float16,0,0.25941334168116253
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,1,float16,fp8,0,0.2583893338839213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,1,fp8,fp8,0,0.24712532758712769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,1,float16,float16,0,0.2624853253364563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,1,float16,fp8,0,0.2616213361422221
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,1,float16,float16,0,0.17083734273910522
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,1,float16,fp8,0,3.067045211791992
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,1,float16,fp8,0,0.1718453367551168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,1,fp8,fp8,0,0.24985599517822266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,1,float16,float16,0,0.1628159979979197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,1,float16,fp8,0,0.1609386702378591
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,1,fp8,fp8,0,0.15479466319084167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,1,fp8,fp8,0,0.16588800152142844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,1,float16,float16,0,0.1628159979979197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,1,float16,fp8,0,0.16025599837303162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,1,fp8,fp8,0,0.15496533115704855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,1,float16,float16,0,0.15923733512560526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,1,float16,fp8,0,0.16435199975967407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,1,fp8,fp8,0,0.15377066532770792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,1,float16,float16,0,0.1621226668357849
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,1,float16,fp8,0,0.1616213321685791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,1,fp8,fp8,0,0.15479466319084167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,1,float16,float16,0,3.5406506856282554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,1,float16,fp8,0,3.5404799779256186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,1,fp8,fp8,0,3.4880854288736978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,1,float16,float16,0,3.5510667165120444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,1,float16,fp8,0,3.5452585220336914
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,1,fp8,fp8,0,3.5085652669270835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,1,float16,float16,0,3.5839999516805015
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,1,float16,fp8,0,3.576149304707845
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,1,fp8,fp8,0,3.530069351196289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,1,float16,float16,0,3.6106239954630532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,1,float16,fp8,0,3.6420265833536782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,1,fp8,fp8,0,3.597653388977051
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,1,float16,float16,0,1.7774933179219563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,1,float16,float16,0,1.9300692876180012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,1,float16,fp8,0,1.7699839274088542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,1,float16,fp8,0,1.9549867312113445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,1,fp8,fp8,0,1.9539626439412434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,1,fp8,fp8,0,1.748480002085368
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,1,float16,float16,0,1.7703146934509277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,1,float16,fp8,0,1.7815893491109211
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,1,fp8,fp8,0,1.7703253428141277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,1,float16,float16,0,1.7819306055704753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,1,float16,fp8,0,1.8030932744344075
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,1,fp8,fp8,0,1.7679306666056316
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,1,float16,float16,0,1.8225439389546711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,1,float16,fp8,0,1.8095787366231282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,1,fp8,fp8,0,1.8024160067240398
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,1,float16,float16,0,1.0026666323343914
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,1,float16,float16,0,0.915450652440389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,1,float16,fp8,0,1.0115306377410889
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,1,float16,fp8,0,0.9164799849192301
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,1,fp8,fp8,0,1.0019893646240234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,1,fp8,fp8,0,0.9041919708251953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,1,float16,float16,0,0.9231359958648682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,1,float16,fp8,0,0.9186986287434896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,1,fp8,fp8,0,0.9028266270955404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,1,float16,float16,0,0.9238133430480957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,1,float16,fp8,0,0.9262080192565918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,1,fp8,fp8,0,0.9086293379465739
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,1,float16,float16,0,0.9309866428375244
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,1,float16,fp8,0,0.9330346584320068
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,1,fp8,fp8,0,0.9265493551890055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,1,float16,float16,0,0.529749313990275
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,1,float16,float16,0,0.48349865277608234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,1,float16,fp8,0,0.48520533243815106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,1,float16,fp8,0,0.5331626733144125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,1,fp8,fp8,0,0.4773546854654948
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,1,fp8,fp8,0,0.5283840099970499
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,1,float16,float16,0,0.48554666837056476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,1,float16,fp8,0,0.48418132464090985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,1,fp8,fp8,0,0.4776959816614787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,1,float16,float16,0,0.4903413454691569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,1,float16,fp8,0,0.49134933948516846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,1,fp8,fp8,0,0.47940266132354736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,1,float16,float16,0,0.4957866668701172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,1,float16,fp8,0,0.49988265832265216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,1,fp8,fp8,0,0.4889599879582723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,1,float16,float16,0,0.2882560094197591
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,1,float16,float16,0,0.26368000109990436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,1,float16,fp8,0,0.295413335164388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,1,fp8,fp8,0,0.28859732548395794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,1,float16,fp8,0,0.26316799720128375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,1,fp8,fp8,0,0.2621440092722575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,1,float16,float16,0,0.2657279968261719
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,1,float16,fp8,0,0.2674186627070109
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,1,fp8,fp8,0,0.2624800006548564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,1,float16,float16,0,0.2701653242111206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,1,float16,fp8,0,0.2688000003496806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,1,fp8,fp8,0,0.2657279968261719
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,1,float16,float16,0,0.26948267221450806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,1,float16,fp8,0,0.274944007396698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,1,fp8,fp8,0,0.2677759925524394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,1,float16,float16,0,0.16947199900945029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,1,float16,fp8,0,0.17135467131932577
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,1,float16,float16,0,0.150026669104894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,1,float16,fp8,0,0.15342400471369425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,1,fp8,fp8,0,0.16776533921559653
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,1,fp8,fp8,0,0.14591999848683676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,1,float16,float16,0,0.15120533108711243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,1,float16,fp8,0,0.1527466674645742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,1,fp8,fp8,0,0.1469386617342631
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,1,float16,float16,0,0.15052800377209982
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,1,float16,fp8,0,0.15344533324241638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,1,fp8,fp8,0,0.1518933375676473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,1,float16,float16,0,0.15530666708946228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,1,float16,fp8,0,0.1546239952246348
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,1,fp8,fp8,0,0.155130664507548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,1,float16,float16,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,1,float16,fp8,0,0.105813334385554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,1,fp8,fp8,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,1,float16,float16,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,1,float16,fp8,0,0.10341866811116536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,1,fp8,fp8,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,1,float16,float16,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,1,float16,fp8,0,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,1,fp8,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,1,float16,float16,0,0.10171733299891154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,1,float16,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,1,fp8,fp8,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,1,float16,float16,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,1,float16,fp8,0,0.10172800223032634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,1,fp8,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,1,float16,float16,0,2.4782506624857583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,1,float16,fp8,0,2.469370683034261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,1,fp8,fp8,0,2.4802986780802407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,1,float16,float16,0,2.4731252988179526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,1,float16,fp8,0,2.4847307205200195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,1,fp8,fp8,0,2.4982186953226724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,1,float16,float16,0,2.4814933141072593
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,1,float16,fp8,0,2.4847413698832193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,1,fp8,fp8,0,2.520064036051432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,1,float16,float16,0,2.5191946029663086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,1,float16,fp8,0,2.5267200469970703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,1,fp8,fp8,0,2.5509492556254068
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,1,float16,float16,0,1.2438186804453533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,1,float16,float16,0,1.3666987419128418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,1,float16,fp8,0,1.2445013523101807
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,1,float16,fp8,0,1.386837323506673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,1,fp8,fp8,0,1.409023920694987
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,1,fp8,fp8,0,1.2567893664042156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,1,float16,float16,0,1.2448426882425945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,1,float16,fp8,0,1.252010663350423
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,1,fp8,fp8,0,1.26310396194458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,1,float16,float16,0,1.2596960067749023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,1,float16,fp8,0,1.2554240226745605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,1,fp8,fp8,0,1.2750506401062012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,1,float16,float16,0,1.275391976038615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,1,float16,fp8,0,1.2825600306193035
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,1,fp8,fp8,0,1.2890453338623047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,1,float16,float16,0,0.7096266746520996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,1,float16,float16,0,0.644266684850057
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,1,float16,fp8,0,0.7167999744415283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,1,float16,fp8,0,0.6463146607081095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,1,fp8,fp8,0,0.7256746292114258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,1,fp8,fp8,0,0.6487040122350057
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,1,float16,float16,0,0.6459733247756958
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,1,float16,fp8,0,0.6521173318227133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,1,fp8,fp8,0,0.6528000036875407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,1,float16,float16,0,0.648698647816976
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,1,float16,fp8,0,0.6548480192820231
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,1,fp8,fp8,0,0.6587733427683512
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,1,float16,float16,0,0.6625279982884725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,1,float16,fp8,0,0.6611626545588175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,1,fp8,fp8,0,0.6666239897410074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,1,float16,float16,0,0.377679983774821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,1,float16,float16,0,0.3449173370997111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,1,float16,fp8,0,0.3449173370997111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,1,fp8,fp8,0,0.3476479848225911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,1,fp8,fp8,0,0.38655467828114826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,1,float16,float16,0,0.3432106574376424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,1,float16,fp8,0,0.34594134489695233
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,1,fp8,fp8,0,0.3473066488901774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,1,float16,float16,0,0.34935466448465985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,1,float16,fp8,0,0.350383996963501
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,1,fp8,fp8,0,0.3510560194651286
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,1,float16,float16,0,0.3544906775156657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,1,float16,fp8,0,0.3548159996668498
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,1,fp8,fp8,0,0.35549867153167725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,1,float16,float16,0,0.20958399772644043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,1,float16,fp8,0,0.21401600042978922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,1,fp8,fp8,0,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,1,float16,float16,0,0.18978132804234824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,1,float16,fp8,0,0.1879040002822876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,1,fp8,fp8,0,0.19165333112080893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,1,float16,float16,0,0.18926932414372763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,1,float16,fp8,0,0.18926932414372763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,1,fp8,fp8,0,0.19268266359965006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,1,float16,float16,0,0.19370667139689127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,1,float16,fp8,0,0.1919999917348226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,1,fp8,fp8,0,0.1960960030555725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,1,float16,float16,0,0.19780266284942627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,1,float16,fp8,0,0.1960960030555725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,1,fp8,fp8,0,0.20002132654190063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,1,float16,float16,0,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,1,float16,fp8,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,1,fp8,fp8,0,0.1283466617266337
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,1,float16,fp8,0,0.11673600474993388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,1,fp8,fp8,0,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,1,float16,float16,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,1,float16,fp8,0,0.11639466881752014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,1,fp8,fp8,0,0.11229333281517029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,1,float16,float16,0,0.11433600385983785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,1,fp8,fp8,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,1,float16,fp8,0,0.11674132943153381
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,1,float16,float16,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,1,float16,fp8,0,0.11639466881752014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,1,fp8,fp8,0,0.1160426636536916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,1,float16,float16,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,1,float16,fp8,0,0.08124266564846039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,1,fp8,fp8,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,1,fp8,fp8,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,1,float16,float16,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,1,float16,float16,0,0.07919999957084656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,1,float16,fp8,0,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,1,fp8,fp8,0,0.07680533329645793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,1,float16,float16,0,0.07918400069077809
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,1,fp8,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,1,float16,float16,0,2.777770678202311
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,1,float16,fp8,0,2.766511917114258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,1,fp8,fp8,0,2.9257386525472007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,1,float16,float16,0,2.80132261912028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,1,float16,fp8,0,2.788010597229004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,1,fp8,fp8,0,3.0684213638305664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,1,float16,float16,0,2.9385385513305664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,1,float16,fp8,0,2.9353014628092446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,1,fp8,fp8,0,3.358544031778971
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,1,float16,float16,0,2.9757439295450845
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,1,float16,fp8,0,2.943488121032715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,1,fp8,fp8,0,3.3599093755086265
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,1,float16,fp8,0,0.3848533233006795
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,1,float16,float16,0,1.4069760640462239
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,1,float16,float16,0,1.5894187291463215
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,1,float16,fp8,0,1.5428266525268555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,1,fp8,fp8,0,1.6559786796569824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,1,float16,fp8,0,1.3957173029581706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,1,fp8,fp8,0,1.478314717610677
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,1,float16,fp8,0,1.4069760640462239
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,1,fp8,fp8,0,1.5121067365010579
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,1,float16,float16,0,1.4704640706380208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,1,float16,fp8,0,1.4377013842264812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,1,fp8,fp8,0,1.6766293843587239
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,1,float16,float16,0,1.4656799634297688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,1,float16,fp8,0,1.4404266675313313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,1,fp8,fp8,0,1.6747573216756184
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,1,float16,float16,0,0.7889973322550455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,1,float16,fp8,0,0.7727786699930826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,1,fp8,fp8,0,0.8330240249633789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,1,float16,float16,0,0.7202186584472656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,1,float16,fp8,0,0.7109920183817545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,1,fp8,fp8,0,0.7478613058725992
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,1,float16,float16,0,0.7249866326649984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,1,float16,fp8,0,0.7147520383199056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,1,fp8,fp8,0,0.7625386714935303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,1,float16,float16,0,0.741370677947998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,1,float16,fp8,0,0.733184019724528
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,1,fp8,fp8,0,0.8374559879302979
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,1,float16,float16,0,0.7447893619537354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,1,float16,fp8,0,0.7325013478597006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,1,fp8,fp8,0,0.8306346734364828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,1,float16,float16,0,0.4072106679280599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,1,float16,fp8,0,0.39800000190734863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,1,float16,float16,0,0.3729066848754883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,1,float16,fp8,0,0.36744534969329834
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,1,fp8,fp8,0,0.4264959891637166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,1,fp8,fp8,0,0.3892800013224284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,1,float16,float16,0,0.3715413411458333
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,1,float16,fp8,0,0.37050668398539227
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,1,fp8,fp8,0,0.3959413369496663
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,1,float16,float16,0,0.38042668501536053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,1,float16,fp8,0,0.37939198811848956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,1,fp8,fp8,0,0.4210346539815267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,1,float16,fp8,0,0.3797333240509033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,1,float16,float16,0,0.38178133964538574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,1,fp8,fp8,0,0.4230773448944092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,1,float16,float16,0,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,1,float16,fp8,0,0.21197332938512167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,1,fp8,fp8,0,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,1,float16,float16,0,0.19541333119074503
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,1,float16,fp8,0,0.19473065932591757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,1,fp8,fp8,0,0.20514132579167685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,1,float16,float16,0,0.1962666710217794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,1,float16,fp8,0,0.1962666710217794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,1,fp8,fp8,0,0.20770132541656494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,1,float16,float16,0,0.2034346659978231
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,1,float16,fp8,0,0.2000160018603007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,1,fp8,fp8,0,0.21880000829696655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,1,float16,float16,0,0.20411733786265054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,1,float16,fp8,0,0.20292266209920248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,1,fp8,fp8,0,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,1,float16,float16,0,0.1204800009727478
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,1,float16,fp8,0,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,1,fp8,fp8,0,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,1,float16,float16,0,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,1,float16,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,1,fp8,fp8,0,0.11026133100191753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,1,float16,float16,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,1,float16,fp8,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,1,fp8,fp8,0,0.11161599556605022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,1,float16,float16,0,0.11059733231862386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,1,float16,fp8,0,0.10990400115648906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,1,fp8,fp8,0,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,1,float16,float16,0,0.11196266611417134
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,1,float16,fp8,0,0.11230400204658508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,1,fp8,fp8,0,0.12014399965604146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,1,float16,float16,0,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,1,float16,fp8,0,0.06689066688219707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,1,float16,float16,0,0.0645066648721695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,1,fp8,fp8,0,0.07203199962774913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,1,fp8,fp8,0,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,1,float16,float16,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,1,float16,fp8,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,1,float16,float16,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,1,float16,fp8,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,1,float16,float16,0,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,1,fp8,fp8,0,0.06790933509667714
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,1,float16,fp8,0,0.04267199834187826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,1,float16,float16,0,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,1,fp8,fp8,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,1,float16,fp8,0,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,1,fp8,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,1,fp8,fp8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,1,float16,fp8,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,1,float16,float16,0,1.4134559631347656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,1,float16,fp8,0,2.418522675832113
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,1,float16,float16,0,2.429445266723633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,1,fp8,fp8,0,2.624517281850179
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,1,float16,float16,0,2.4639199574788413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,1,float16,fp8,0,2.445141315460205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,1,fp8,fp8,0,2.768218676249186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,1,float16,float16,0,2.577909310658773
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,1,float16,fp8,0,2.57041072845459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,1,fp8,fp8,0,3.080709457397461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,1,float16,float16,0,2.6021547317504883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,1,float16,fp8,0,2.5676746368408203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,1,fp8,fp8,0,3.076266606648763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,1,float16,float16,0,1.4117546081542969
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,1,float16,float16,0,1.2301653226216633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,1,float16,fp8,0,1.373184045155843
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,1,fp8,fp8,0,1.5059572855631511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,1,float16,fp8,0,1.2202719847361247
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,1,fp8,fp8,0,1.3233439922332764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,1,float16,float16,0,1.2363093694051106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,1,float16,fp8,0,1.229482650756836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,1,fp8,fp8,0,1.3725013732910156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,1,float16,float16,0,1.282901366551717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,1,float16,fp8,0,1.2658452987670898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,1,fp8,fp8,0,1.5291733741760254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,1,float16,float16,0,1.291434685389201
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,1,float16,fp8,0,1.275391976038615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,1,fp8,fp8,0,1.526096026102702
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,1,float16,float16,0,0.6987093289693197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,1,float16,float16,0,0.6266880035400391
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,1,float16,fp8,0,0.6901760101318359
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,1,float16,fp8,0,0.6202079852422079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,1,fp8,fp8,0,0.6717387040456136
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,1,fp8,fp8,0,0.7591306368509928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,1,float16,float16,0,0.6337013244628906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,1,float16,fp8,0,0.6256639957427979
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,1,fp8,fp8,0,0.6877866586049398
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,1,float16,float16,0,0.648698647816976
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,1,float16,fp8,0,0.6456319888432821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,1,fp8,fp8,0,0.7618559996287028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,1,float16,float16,0,0.649727980295817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,1,float16,fp8,0,0.6435786485671997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,1,fp8,fp8,0,0.7639040152231852
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,1,float16,float16,0,0.3595946629842122
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,1,float16,float16,0,0.32579733928044635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,1,float16,fp8,0,0.3213653365770976
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,1,float16,fp8,0,0.35276798407236737
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,1,fp8,fp8,0,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,1,fp8,fp8,0,0.34867199261983234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,1,float16,float16,0,0.32613333066304523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,1,float16,fp8,0,0.32307199637095135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,1,fp8,fp8,0,0.3551573355992635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,1,float16,float16,0,0.33843199412027997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,1,float16,fp8,0,0.33399466673533124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,1,fp8,fp8,0,0.377344012260437
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,1,float16,float16,0,0.337066650390625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,1,float16,fp8,0,0.33536001046498615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,1,float16,float16,0,0.1920106609662374
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,1,fp8,fp8,0,0.37461864948272705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,1,float16,fp8,0,0.1872160037358602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,1,float16,float16,0,0.17100799083709717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,1,fp8,fp8,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,1,float16,fp8,0,0.1704960068066915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,1,fp8,fp8,0,0.18312533696492514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,1,float16,float16,0,0.17254400253295898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,1,float16,fp8,0,0.17407999436060587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,1,fp8,fp8,0,0.1848319967587789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,1,float16,float16,0,0.17800533771514893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,1,float16,fp8,0,0.17698132991790771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,1,fp8,fp8,0,0.19746132691701254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,1,float16,float16,0,0.1795360048611959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,1,float16,fp8,0,0.17868266503016153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,1,fp8,fp8,0,0.1954186757405599
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,1,float16,float16,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,1,float16,fp8,0,0.1065120001633962
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,1,float16,float16,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,1,float16,fp8,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,1,fp8,fp8,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,1,float16,float16,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,1,float16,fp8,0,0.09454400340716045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,1,fp8,fp8,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,1,float16,float16,0,0.09761599699656169
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,1,float16,fp8,0,0.09694400429725647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,1,fp8,fp8,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,1,float16,float16,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,1,float16,fp8,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,1,float16,float16,0,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,1,float16,fp8,0,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,1,float16,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,1,fp8,fp8,0,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,1,float16,float16,0,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,1,float16,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,1,fp8,fp8,0,0.055973331133524575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,1,float16,fp8,0,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,1,fp8,fp8,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,1,float16,float16,0,0.05598400036493937
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,1,float16,float16,0,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,1,float16,fp8,0,0.036517334481080375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,1,float16,float16,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,1,float16,fp8,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,1,fp8,fp8,0,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,1,float16,float16,0,0.03516799956560135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,1,float16,float16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,1,float16,float16,0,0.027104000250498455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,1,float16,float16,0,0.027984000742435455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,1,float16,fp8,0,0.028330666323502857
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,1,float16,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,1,float16,float16,0,0.908624013264974
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,1,float16,fp8,0,0.8963413238525391
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,1,fp8,fp8,0,0.970240036646525
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,1,float16,float16,0,0.9168213208516439
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,1,fp8,fp8,0,1.0071039994557698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,1,float16,fp8,0,0.912384033203125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,1,float16,float16,0,0.9644373257954916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,1,float16,fp8,0,0.9521493117014567
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,1,fp8,fp8,0,1.174015998840332
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,1,float16,float16,0,0.9838879903157552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,1,float16,fp8,0,0.9487360318501791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,1,fp8,fp8,0,1.1813600063323975
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,1,float16,float16,0,0.4664320151011149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,1,float16,float16,0,0.5413546562194824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,1,float16,fp8,0,0.4623359839121501
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,1,float16,fp8,0,0.5331626733144125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,1,fp8,fp8,0,0.5812906821568807
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,1,fp8,fp8,0,0.4957866668701172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,1,float16,float16,0,0.4729280074437459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,1,float16,fp8,0,0.4715520143508911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,1,fp8,fp8,0,0.5101173321406046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,1,float16,float16,0,0.493397315343221
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,1,float16,fp8,0,0.48760000864664715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,1,fp8,fp8,0,0.5881173213322958
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,1,float16,float16,0,0.4913546641667684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,1,float16,fp8,0,0.4872479836146037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,1,fp8,fp8,0,0.5843626658121744
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,1,float16,float16,0,0.28893866141637164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,1,float16,fp8,0,0.2807413339614868
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,1,float16,float16,0,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,1,float16,fp8,0,0.24678399165471396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,1,fp8,fp8,0,0.3002026677131653
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,1,fp8,fp8,0,0.2587253252665202
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,1,float16,float16,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,1,float16,fp8,0,0.24951465924580893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,1,fp8,fp8,0,0.2664053241411845
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,1,float16,float16,0,0.25975465774536133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,1,float16,fp8,0,0.25702399015426636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,1,fp8,fp8,0,0.2937120000521342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,1,float16,float16,0,0.26179732879002887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,1,float16,fp8,0,0.2590346733729045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,1,fp8,fp8,0,0.2892853418986003
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,1,float16,float16,0,0.15718400478363037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,1,float16,float16,0,0.13260799646377563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,1,float16,fp8,0,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,1,float16,fp8,0,0.1520639955997467
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,1,fp8,fp8,0,0.1604266663392385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,1,fp8,fp8,0,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,1,float16,float16,0,0.13499733805656433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,1,float16,fp8,0,0.13327999909718832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,1,fp8,fp8,0,0.14267733693122864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,1,float16,float16,0,0.13942933082580566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,1,float16,fp8,0,0.13942933082580566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,1,fp8,fp8,0,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,1,float16,float16,0,0.14455466469128928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,1,float16,fp8,0,0.1397706667582194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,1,fp8,fp8,0,0.1565013329188029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,1,float16,float16,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,1,float16,fp8,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,1,float16,float16,0,0.07714666426181793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,1,float16,fp8,0,0.07576533158620198
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,1,fp8,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,1,float16,float16,0,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,1,float16,fp8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,1,float16,float16,0,0.07885333398977916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,1,float16,fp8,0,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,1,fp8,fp8,0,0.08567999800046285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,1,float16,float16,0,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,1,float16,float16,0,0.05119466781616211
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,1,float16,float16,0,0.04709866642951965
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,1,fp8,fp8,0,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,1,fp8,fp8,0,0.0529120018084844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,1,fp8,fp8,0,0.04608533283074697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,1,float16,float16,0,0.048138668139775596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,1,float16,fp8,0,0.031727999448776245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,1,float16,fp8,0,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,1,fp8,fp8,0,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,1,fp8,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,1,float16,float16,0,0.4920320113499959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,1,float16,fp8,0,0.48554666837056476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,1,fp8,fp8,0,0.5111466646194458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,1,float16,float16,0,0.5022720098495483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,1,float16,fp8,0,0.4944213231404622
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,1,fp8,fp8,0,0.532480001449585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,1,float16,float16,0,0.5227520068486532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,1,float16,fp8,0,0.516266663869222
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,1,fp8,fp8,0,0.596997340520223
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,1,float16,float16,0,0.5241173505783081
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,1,float16,fp8,0,0.5179680188496908
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,1,fp8,fp8,0,0.6010773181915283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,1,float16,float16,0,0.29713066418965656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,1,float16,fp8,0,0.28995732466379803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,1,float16,float16,0,0.25599465767542523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,1,fp8,fp8,0,0.3118079900741577
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,1,float16,fp8,0,0.25361067056655884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,1,fp8,fp8,0,0.26606933275858563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,1,float16,float16,0,0.2611146569252014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,1,float16,fp8,0,0.2573653260866801
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,1,fp8,fp8,0,0.2763200004895528
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,1,float16,float16,0,0.272213339805603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,1,float16,fp8,0,0.26948267221450806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,1,fp8,fp8,0,0.29678932825724286
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,1,float16,float16,0,0.2728959918022156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,1,float16,fp8,0,0.2677759925524394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,1,fp8,fp8,0,0.29678932825724286
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,1,float16,float16,0,0.16060266892115274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,1,float16,fp8,0,0.15838932991027832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,1,float16,float16,0,0.13777066270510355
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,1,fp8,fp8,0,0.16622933745384216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,1,float16,fp8,0,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,1,fp8,fp8,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,1,float16,float16,0,0.13942933082580566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,1,float16,fp8,0,0.13772799571355185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,1,fp8,fp8,0,0.14780267079671225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,1,float16,float16,0,0.14591466387112936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,1,float16,fp8,0,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,1,fp8,fp8,0,0.15752533078193665
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,1,float16,float16,0,0.1469439963499705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,1,float16,fp8,0,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,1,fp8,fp8,0,0.15752533078193665
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,1,float16,float16,0,0.09283733367919922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,1,float16,fp8,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,1,fp8,fp8,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,1,float16,fp8,0,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,1,fp8,fp8,0,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,1,float16,float16,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,1,fp8,fp8,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,1,float16,float16,0,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,1,float16,float16,0,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,1,float16,float16,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,1,fp8,fp8,0,0.08841066559155782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,1,float16,fp8,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,1,float16,fp8,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,1,fp8,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,1,float16,float16,0,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,1,float16,fp8,0,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,1,float16,float16,0,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,1,float16,fp8,0,0.01869333287080129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,1,float16,float16,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,1,float16,float16,0,0.01836266616980235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,1,float16,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,1,float16,float16,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,1,float16,float16,0,0.33638934294382733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,1,float16,fp8,0,0.33161065975824994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,1,fp8,fp8,0,0.37461332480112713
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,1,float16,float16,0,0.3408213456471761
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,1,float16,fp8,0,0.3346773386001587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,1,fp8,fp8,0,0.3749599854151408
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,1,float16,fp8,0,0.3466240167617798
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,1,float16,float16,0,0.3551093339920044
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,1,fp8,fp8,0,0.40140799681345624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,1,float16,fp8,0,0.349018653233846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,1,float16,float16,0,0.35072000821431476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,1,fp8,fp8,0,0.4007253249486287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,1,float16,float16,0,0.19882667064666748
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,1,float16,fp8,0,0.19370132684707642
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,1,fp8,fp8,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,1,float16,float16,0,0.17783466974894205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,1,float16,fp8,0,0.17492800951004028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,1,fp8,fp8,0,0.19642666975657144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,1,float16,float16,0,0.18005865812301636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,1,float16,fp8,0,0.17647467056910196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,1,fp8,fp8,0,0.19746132691701254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,1,float16,float16,0,0.1867093245188395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,1,float16,fp8,0,0.18244266510009766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,1,fp8,fp8,0,0.20922666788101196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,1,float16,float16,0,0.1893706719080607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,1,float16,fp8,0,0.18414932489395142
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,1,fp8,fp8,0,0.20872533321380615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,1,float16,float16,0,0.1109279990196228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,1,float16,fp8,0,0.10787199934323628
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,1,float16,fp8,0,0.09521599610646565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,1,fp8,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,1,fp8,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,1,float16,float16,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,1,float16,fp8,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,1,fp8,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,1,float16,float16,0,0.10410133004188538
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,1,float16,fp8,0,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,1,fp8,fp8,0,0.11264533797899882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,1,float16,float16,0,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,1,float16,float16,0,0.05700799822807312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,1,float16,float16,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,1,float16,fp8,0,0.056661332647005715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,1,fp8,fp8,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,1,float16,float16,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,1,float16,float16,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,1,float16,fp8,0,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,1,float16,float16,0,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,1,fp8,fp8,0,0.02902399996916453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,1,fp8,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,1,float16,float16,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,1,float16,float16,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,1,float16,float16,0,0.01868266612291336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,1,float16,fp8,0,0.018618666877349217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,1,float16,float16,0,0.01798933371901512
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,1,float16,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,1,float16,float16,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,1,float16,fp8,0,0.01852799952030182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,1,float16,fp8,0,0.017637333522240322
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,1,float16,fp8,0,0.01852799952030182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,1,float16,fp8,0,0.018181333939234417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,1,float16,float16,0,0.2677813371022542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,1,float16,fp8,0,0.26742400725682575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,1,fp8,fp8,0,0.30293333530426025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,1,float16,float16,0,0.2701599995295207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,1,float16,fp8,0,0.2667520046234131
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,1,fp8,fp8,0,0.3015679915746053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,1,float16,float16,0,0.274944007396698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,1,float16,fp8,0,0.2739199995994568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,1,fp8,fp8,0,0.3145386576652527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,1,float16,float16,0,0.274944007396698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,1,float16,fp8,0,0.2759679953257243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,1,fp8,fp8,0,0.3155626654624939
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,1,float16,float16,0,0.1534293293952942
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,1,float16,fp8,0,0.15308800339698792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,1,float16,float16,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,1,fp8,fp8,0,0.1742453376452128
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,1,float16,fp8,0,0.14199466506640115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,1,fp8,fp8,0,0.1599146624406179
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,1,float16,float16,0,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,1,float16,fp8,0,0.1423520048459371
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,1,fp8,fp8,0,0.15889066457748413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,1,float16,float16,0,0.1479626695315043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,1,float16,fp8,0,0.14575466513633728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,1,float16,fp8,0,0.046069333950678505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,1,fp8,fp8,0,0.16639999548594156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,1,float16,float16,0,0.14864533146222433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,1,float16,fp8,0,0.14728533228238425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,1,fp8,fp8,0,0.16742932796478271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,1,float16,float16,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,1,fp8,fp8,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,1,fp8,fp8,0,0.08668800195058186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,1,fp8,fp8,0,0.0867039958635966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,1,float16,float16,0,0.0804319977760315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,1,float16,float16,0,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,1,fp8,fp8,0,0.09044800202051799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,1,float16,float16,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,1,float16,fp8,0,0.047797332207361855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,1,float16,float16,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,1,float16,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,1,fp8,fp8,0,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,1,fp8,fp8,0,0.03480533262093862
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,1,float16,float16,0,0.023562667270501454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,1,float16,float16,0,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,1,fp8,fp8,0,0.02295999974012375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,1,float16,fp8,0,0.022810667753219604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,1,float16,fp8,0,0.02279466638962428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,1,fp8,fp8,0,0.02294933299223582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,1,float16,float16,0,0.018522666146357853
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,1,float16,fp8,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,1,fp8,fp8,0,0.018725333114465077
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,1,fp8,fp8,0,0.01859733338157336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,1,fp8,fp8,0,0.016645333419243496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,1,float16,fp8,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,1,fp8,fp8,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,1,fp8,fp8,0,0.016490666816631954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,1,float16,float16,0,0.01646399994691213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,1,fp8,fp8,0,0.01648533344268799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,1,float16,float16,0,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,1,float16,fp8,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,1,fp8,fp8,0,0.2629973292350769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,1,float16,float16,0,0.23756800095240274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,1,float16,fp8,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,1,fp8,fp8,0,0.26316267251968384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,1,float16,float16,0,0.23995733261108398
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,1,float16,fp8,0,0.23824532826741537
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,1,fp8,fp8,0,0.2691413362820943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,1,float16,float16,0,0.23995733261108398
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,1,float16,fp8,0,0.23892800013224283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,1,fp8,fp8,0,0.2725546757380168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,1,float16,float16,0,0.13174933195114136
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,1,float16,fp8,0,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,1,fp8,fp8,0,0.14779733618100485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,1,float16,float16,0,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,1,float16,fp8,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,1,fp8,fp8,0,0.13960533340771994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,1,float16,float16,0,0.12731200456619263
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,1,float16,fp8,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,1,fp8,fp8,0,0.13909332950909933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,1,float16,float16,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,1,fp8,fp8,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,1,float16,float16,0,0.12834133704503378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,1,fp8,fp8,0,0.14199466506640115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,1,float16,fp8,0,0.07338133454322815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,1,fp8,fp8,0,0.08020799855391185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,1,float16,float16,0,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,1,float16,float16,0,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,1,float16,fp8,0,0.07201600074768066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,1,float16,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,1,float16,float16,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,1,float16,float16,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,1,float16,float16,0,0.022175999979178112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,1,float16,fp8,0,0.02279466638962428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,1,float16,float16,0,0.020741333564122517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,1,float16,fp8,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,1,fp8,fp8,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,1,float16,fp8,0,0.022458667556444805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,1,float16,float16,0,0.022458667556444805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,1,float16,fp8,0,0.022197333474953968
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,1,float16,fp8,0,0.01802666609485944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,1,float16,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,1,float16,float16,0,0.016645333419243496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,1,fp8,fp8,0,0.01657066618402799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,1,float16,float16,0,11.096762339274088
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,1,float16,fp8,0,11.176629384358725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,1,fp8,fp8,0,8.335701624552408
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,1,float16,float16,0,11.133797963460287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,1,float16,fp8,0,10.4618771870931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,1,fp8,fp8,0,8.367434819539389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,1,float16,float16,0,11.072858174641928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,1,float16,fp8,0,11.117232004801432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,1,fp8,fp8,0,8.381098429361979
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,1,float16,float16,0,10.603866577148438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,1,float16,fp8,0,10.996565500895182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,1,fp8,fp8,0,8.461653391520182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,1,float16,float16,0,5.216085433959961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,1,float16,fp8,0,5.227349281311035
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,1,fp8,fp8,0,4.426416079203288
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,1,float16,float16,0,4.973578770955403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,1,float16,float16,0,0.017685333887736004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,1,float16,fp8,0,5.32257080078125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,1,fp8,fp8,0,4.272122701009114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,1,float16,float16,0,4.845909436543782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,1,fp8,fp8,0,4.283050537109375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,1,float16,float16,0,5.219162623087565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,1,float16,fp8,0,5.064874649047852
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,1,fp8,fp8,0,4.29637336730957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,1,float16,float16,0,4.7936906814575195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,1,fp8,fp8,0,4.313088099161784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,1,float16,fp8,0,5.0624745686848955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,1,float16,float16,0,2.6279199918111167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,1,float16,fp8,0,2.706432024637858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,1,fp8,fp8,0,2.328575929005941
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,1,float16,float16,0,2.5468533833821616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,1,fp8,fp8,0,2.2676480611165366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,1,float16,fp8,0,2.6088107426961265
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,1,float16,float16,0,2.5536853472391763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,1,fp8,fp8,0,2.255018711090088
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,1,float16,fp8,0,2.5879999796549478
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,1,float16,float16,0,2.5652853647867837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,1,float16,fp8,0,2.6234827041625977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,1,fp8,fp8,0,2.264234701792399
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,1,float16,float16,0,2.651477336883545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,1,fp8,fp8,0,2.2748160362243652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,1,float16,float16,0,1.4318933486938477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,1,float16,fp8,0,1.4585173924763997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,1,fp8,fp8,0,1.2835840384165447
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,1,float16,float16,0,1.4173812866210938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,1,float16,fp8,0,1.4011732737223308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,1,fp8,fp8,0,1.2598613103230794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,1,float16,float16,0,1.4039039611816406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,1,float16,fp8,0,1.425066630045573
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,1,fp8,fp8,0,1.250986655553182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,1,float16,float16,0,1.4199466705322266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,1,float16,fp8,0,1.4332586924235027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,1,fp8,fp8,0,1.2603786786397297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,1,float16,float16,0,1.4264319737752278
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,1,float16,fp8,0,1.4286452929178874
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,1,fp8,fp8,0,1.2593493461608887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,1,float16,float16,0,5.882709503173828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,1,float16,fp8,0,5.574991861979167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,1,fp8,fp8,0,5.007530530293782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,1,float16,float16,0,5.710512161254883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,1,float16,fp8,0,5.6282399495442705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,1,fp8,fp8,0,5.022207895914714
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,1,float16,float16,0,6.162256240844727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,1,float16,fp8,0,6.254933039347331
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,1,float16,fp8,0,4.852736155192058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,1,fp8,fp8,0,5.045248031616211
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,1,float16,float16,0,6.0859731038411455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,1,float16,fp8,0,5.737813313802083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,1,fp8,fp8,0,5.085866610209147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,1,float16,float16,0,3.0271145502726235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,1,float16,fp8,0,3.09879461924235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,1,float16,float16,0,2.92301336924235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,1,float16,fp8,0,2.9631147384643555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,1,fp8,fp8,0,2.706432024637858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,1,fp8,fp8,0,2.5837225914001465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,1,float16,float16,0,2.961759885152181
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,1,float16,fp8,0,2.9597012201944985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,1,fp8,fp8,0,2.591909408569336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,1,float16,float16,0,2.9716478983561196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,1,float16,fp8,0,2.974037488301595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,1,float16,fp8,0,2.679818789164225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,1,fp8,fp8,0,2.6065920193990073
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,1,float16,float16,0,3.0076586405436196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,1,float16,fp8,0,2.994688034057617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,1,fp8,fp8,0,2.6197333335876465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,1,float16,float16,0,1.6358399391174316
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,1,float16,float16,0,1.6027305920918782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,1,float16,fp8,0,1.5600639979044597
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,1,float16,fp8,0,1.682762622833252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,1,fp8,fp8,0,1.3967413902282715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,1,fp8,fp8,0,1.4535679817199707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,1,float16,float16,0,1.549301306406657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,1,float16,fp8,0,1.6262772878011067
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,1,fp8,fp8,0,1.3919466336568196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,1,float16,float16,0,1.6133066813151042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,1,float16,fp8,0,1.6092160542805989
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,1,fp8,fp8,0,1.3943467140197754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,1,float16,float16,0,1.6143360137939453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,1,float16,fp8,0,1.5993173917134602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,1,float16,float16,0,0.9326933224995931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,1,fp8,fp8,0,1.402880032857259
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,1,float16,fp8,0,0.9456640084584554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,1,fp8,fp8,0,0.8210773468017578
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,1,float16,float16,0,0.9079466660817465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,1,float16,fp8,0,0.8826879660288492
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,1,fp8,fp8,0,0.7930880387624105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,1,float16,float16,0,0.912389357884725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,1,float16,fp8,0,0.91102401415507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,1,fp8,fp8,0,0.792405366897583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,1,float16,float16,0,0.8898666699727377
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,1,float16,fp8,0,0.9048639933268229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,1,fp8,fp8,0,0.7951412995656332
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,1,float16,float16,0,0.9062453111012777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,1,float16,fp8,0,0.927232027053833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,1,fp8,fp8,0,0.8036746978759766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,1,float16,float16,0,4.234922726949056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,1,float16,fp8,0,4.06715742746989
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,1,fp8,fp8,0,3.659776051839193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,1,float16,float16,0,4.1932799021403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,1,float16,fp8,0,4.213418642679851
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,1,fp8,fp8,0,3.6666027704874673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,1,float16,float16,0,4.14958922068278
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,1,float16,fp8,0,4.3048960367838545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,1,fp8,fp8,0,3.684864044189453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,1,float16,float16,0,4.082357406616211
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,1,float16,fp8,0,4.191232045491536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,1,fp8,fp8,0,3.716949462890625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,1,float16,float16,0,2.299562613169352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,1,float16,float16,0,2.1524480183919272
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,1,float16,fp8,0,2.235904057820638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,1,fp8,fp8,0,1.9013973871866863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,1,float16,fp8,0,2.157909393310547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,1,fp8,fp8,0,2.004986604054769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,1,float16,float16,0,2.126848061879476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,1,fp8,fp8,0,1.902762730916341
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,1,float16,fp8,0,2.159615993499756
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,1,float16,float16,0,2.187274614969889
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,1,float16,fp8,0,2.1375039418538413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,1,fp8,fp8,0,1.911296049753825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,1,float16,float16,0,2.184879938761393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,1,float16,fp8,0,2.2203680674235025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,1,fp8,fp8,0,1.9276800155639648
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,1,float16,float16,0,1.2386933167775471
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,1,float16,fp8,0,1.2475679715474446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,1,float16,float16,0,1.1501226425170898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,1,fp8,fp8,0,1.0828800201416016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,1,float16,fp8,0,1.1579786936442058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,1,fp8,fp8,0,1.0306560198465984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,1,float16,float16,0,1.1654826800028484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,1,float16,fp8,0,1.2072959740956624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,1,fp8,fp8,0,1.0364480018615723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,1,float16,float16,0,1.2038826942443848
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,1,float16,fp8,0,1.18886399269104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,1,fp8,fp8,0,1.0435787041982014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,1,float16,float16,0,1.1732693513234456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,1,float16,fp8,0,1.1816960175832112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,1,fp8,fp8,0,1.0436267058054607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,1,float16,float16,0,0.7051946322123209
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,1,float16,fp8,0,0.7086079915364584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,1,float16,float16,0,0.6710613568623861
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,1,float16,fp8,0,0.6843732992808024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,1,fp8,fp8,0,0.6253226598103842
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,1,fp8,fp8,0,0.5980159838994344
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,1,float16,float16,0,0.6655999819437662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,1,float16,fp8,0,0.687445322672526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,1,fp8,fp8,0,0.6014346679051717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,1,float16,float16,0,0.6833493709564209
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,1,float16,fp8,0,0.6785706679026285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,1,fp8,fp8,0,0.6027946472167969
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,1,float16,float16,0,0.6908586819966634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,1,float16,fp8,0,0.6908586819966634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,1,fp8,fp8,0,0.6065599918365479
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,1,float16,float16,0,5.476352055867513
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,1,float16,fp8,0,5.457594553629558
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,1,fp8,fp8,0,4.9510453542073565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,1,float16,float16,0,5.5872853597005205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,1,float16,fp8,0,5.808127721150716
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,1,fp8,fp8,0,4.949674606323242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,1,float16,float16,0,5.842101414998372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,1,float16,fp8,0,5.5401865641276045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,1,fp8,fp8,0,4.969829241434733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,1,float16,float16,0,5.7221120198567705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,1,float16,fp8,0,5.94644292195638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,1,fp8,fp8,0,5.014016151428223
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,1,float16,float16,0,2.7859627405802407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,1,float16,float16,0,3.0178985595703125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,1,float16,fp8,0,2.786639849344889
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,1,float16,fp8,0,3.0059467951456704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,1,fp8,fp8,0,2.5080533027648926
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,1,fp8,fp8,0,2.668191909790039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,1,float16,float16,0,2.8241920471191406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,1,float16,fp8,0,2.8361387252807617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,1,fp8,fp8,0,2.5192106564839682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,1,float16,float16,0,2.854229291280111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,1,float16,fp8,0,2.822826703389486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,1,fp8,fp8,0,2.538325309753418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,1,float16,float16,0,2.9049173990885415
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,1,float16,fp8,0,2.8813654581705728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,1,float16,float16,0,1.5532372792561848
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,1,fp8,fp8,0,2.5550506909688315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,1,float16,fp8,0,1.6283307075500488
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,1,float16,float16,0,1.4776320457458496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,1,fp8,fp8,0,1.3994666735331218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,1,float16,fp8,0,1.5035732587178547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,1,fp8,fp8,0,1.3257386684417725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,1,float16,float16,0,1.4817333221435547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,1,float16,fp8,0,1.4820693333943684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,1,fp8,fp8,0,1.3230079809824626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,1,float16,float16,0,1.498095989227295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,1,float16,fp8,0,1.525760014851888
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,1,fp8,fp8,0,1.33188263575236
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,1,float16,float16,0,1.497770627339681
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,1,float16,fp8,0,1.5213279724121094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,1,float16,float16,0,0.8683520158131918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,1,fp8,fp8,0,1.3520213762919109
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,1,float16,fp8,0,0.8656213283538818
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,1,float16,float16,0,0.8207360108693441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,1,float16,fp8,0,0.8360959688822428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,1,fp8,fp8,0,0.7287466526031494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,1,fp8,fp8,0,0.7669760386149088
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,1,float16,float16,0,0.8333653608957926
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,1,float16,fp8,0,0.8381439844767252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,1,fp8,fp8,0,0.7304480075836182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,1,float16,float16,0,0.8181760311126709
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,1,float16,fp8,0,0.8384746710459391
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,1,fp8,fp8,0,0.7393333117167155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,1,float16,float16,0,0.8330240249633789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,1,float16,fp8,0,0.8564053376515707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,1,float16,float16,0,0.5039786497751871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,1,fp8,fp8,0,0.7403519948323568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,1,float16,fp8,0,0.5039786497751871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,1,float16,float16,0,0.4828159809112549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,1,float16,fp8,0,0.4804266691207886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,1,fp8,fp8,0,0.4514133135477702
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,1,fp8,fp8,0,0.430245320002238
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,1,float16,float16,0,0.4838399887084961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,1,float16,fp8,0,0.482805331548055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,1,fp8,fp8,0,0.43195732434590656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,1,float16,float16,0,0.4807573159535726
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,1,float16,fp8,0,0.4899786710739136
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,1,fp8,fp8,0,0.4326346715291341
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,1,float16,float16,0,0.49513065814971924
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,1,float16,fp8,0,0.49032533168792725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,1,fp8,fp8,0,0.44048531850179035
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,1,float16,float16,0,3.4263038635253906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,1,float16,fp8,0,3.454634666442871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,1,fp8,fp8,0,3.0752426783243814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,1,float16,float16,0,3.4308694203694663
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,1,float16,fp8,0,3.4669227600097656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,1,fp8,fp8,0,3.0916268030802407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,1,float16,float16,0,3.4092321395874023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,1,float16,fp8,0,3.4464426040649414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,1,fp8,fp8,0,3.1165440877278647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,1,float16,float16,0,3.5253012975056968
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,1,float16,fp8,0,3.465557416280111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,1,fp8,fp8,0,3.1554508209228516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,1,float16,float16,0,1.7812533378601074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,1,float16,float16,0,1.8659040133158367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,1,float16,fp8,0,1.9000372886657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,1,float16,fp8,0,1.7546186447143555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,1,fp8,fp8,0,1.5819093386332195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,1,fp8,fp8,0,1.7046186129252117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,1,float16,float16,0,1.7733972867329915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,1,float16,fp8,0,1.7597440083821614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,1,fp8,fp8,0,1.5918080012003581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,1,float16,float16,0,1.7706665992736816
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,1,float16,fp8,0,1.785685380299886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,1,fp8,fp8,0,1.601029396057129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,1,float16,float16,0,1.825285275777181
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,1,float16,fp8,0,1.7850079536437988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,1,fp8,fp8,0,1.6170667012532551
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,1,float16,float16,0,1.0139306386311848
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,1,float16,fp8,0,1.0040319760640461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,1,float16,float16,0,0.9675093491872152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,1,float16,fp8,0,0.9678560098012289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,1,fp8,fp8,0,0.9038453102111816
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,1,fp8,fp8,0,0.8462666670481364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,1,float16,float16,0,0.9692160288492838
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,1,fp8,fp8,0,0.8475306828816732
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,1,float16,fp8,0,0.9695413112640381
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,1,float16,float16,0,0.9654613335927328
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,1,float16,fp8,0,0.9777493476867676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,1,fp8,fp8,0,0.8526506423950195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,1,float16,float16,0,0.9784320195515951
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,1,float16,fp8,0,0.97979736328125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,1,float16,float16,0,0.5556906859079996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,1,fp8,fp8,0,0.8663040002187093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,1,float16,fp8,0,0.570026675860087
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,1,float16,float16,0,0.5273600021998087
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,1,float16,fp8,0,0.5358933210372925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,1,fp8,fp8,0,0.4776906569798787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,1,fp8,fp8,0,0.504319985707601
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,1,float16,float16,0,0.5294080177942911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,1,float16,fp8,0,0.5403306484222412
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,1,fp8,fp8,0,0.4787253141403198
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,1,float16,fp8,0,0.5355519851048788
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,1,fp8,fp8,0,0.481114665667216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,1,float16,float16,0,0.5396480162938436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,1,float16,float16,0,0.5457866589228312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,1,float16,float16,0,0.3346773386001587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,1,float16,fp8,0,0.5416959921518961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,1,fp8,fp8,0,0.48520533243815106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,1,float16,fp8,0,0.3415040175120036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,1,float16,float16,0,0.31591999530792236
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,1,float16,fp8,0,0.3206933339436849
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,1,fp8,fp8,0,0.28893866141637164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,1,fp8,fp8,0,0.30498133103052777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,1,float16,float16,0,0.3169333338737488
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,1,float16,fp8,0,0.3217066725095113
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,1,fp8,fp8,0,0.28893866141637164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,1,float16,float16,0,0.3176106611887614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,1,float16,fp8,0,0.3186453382174174
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,1,fp8,fp8,0,0.29098665714263916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,1,float16,float16,0,0.32614399989446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,1,float16,fp8,0,0.3251146674156189
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,1,fp8,fp8,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,1,float16,float16,0,3.631786664326986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,1,float16,fp8,0,3.5578880310058594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,1,fp8,fp8,0,3.3075199127197266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,1,float16,float16,0,3.590826670328776
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,1,float16,fp8,0,3.5990187327067056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,1,fp8,fp8,0,3.3262933095296225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,1,float16,float16,0,3.6109654108683267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,1,float16,fp8,0,3.578197479248047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,1,fp8,fp8,0,3.3501866658528647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,1,float16,float16,0,3.6875893274943032
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,1,float16,fp8,0,3.6725759506225586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,1,fp8,fp8,0,3.3909759521484375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,1,float16,float16,0,1.7959252993265789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,1,float16,float16,0,2.007040023803711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,1,float16,fp8,0,1.7860266367594402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,1,float16,fp8,0,1.9575467109680176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,1,fp8,fp8,0,1.8310826619466145
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,1,fp8,fp8,0,1.6679306030273438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,1,float16,float16,0,1.8014453252156575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,1,float16,fp8,0,1.8288639386494954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,1,fp8,fp8,0,1.6800427436828613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,1,float16,float16,0,1.8254507382710774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,1,float16,fp8,0,1.8109386761983235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,1,fp8,fp8,0,1.6984693209330242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,1,float16,float16,0,1.8848320643107097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,1,float16,fp8,0,1.8604373931884766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,1,float16,float16,0,1.0361173152923584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,1,fp8,fp8,0,1.718784014383952
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,1,float16,fp8,0,1.0408960183461506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,1,float16,float16,0,0.9647786617279053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,1,float16,fp8,0,0.9514719645182291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,1,fp8,fp8,0,0.9558986822764078
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,1,fp8,fp8,0,0.8778986930847168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,1,float16,float16,0,0.9593173662821451
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,1,float16,fp8,0,0.9572693506876627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,1,fp8,fp8,0,0.8816640377044678
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,1,float16,float16,0,0.9722879727681478
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,1,float16,fp8,0,0.9784320195515951
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,1,fp8,fp8,0,0.8867839972178141
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,1,float16,float16,0,0.9883306821187338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,1,float16,fp8,0,0.9896960258483887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,1,fp8,fp8,0,0.9031679630279541
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,1,float16,float16,0,0.5529599984486898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,1,float16,fp8,0,0.5683199961980184
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,1,float16,float16,0,0.5234346787134806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,1,float16,fp8,0,0.5189866622289022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,1,fp8,fp8,0,0.4787199894587199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,1,fp8,fp8,0,0.514901320139567
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,1,float16,float16,0,0.5266773303349813
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,1,float16,fp8,0,0.5207039912541708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,1,fp8,fp8,0,0.4797439972559611
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,1,float16,float16,0,0.5294080177942911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,1,float16,fp8,0,0.5345333417256674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,1,fp8,fp8,0,0.48110934098561603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,1,float16,float16,0,0.5369173288345337
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,1,float16,fp8,0,0.5396586656570435
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,1,fp8,fp8,0,0.4910026788711548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,1,float16,float16,0,0.3179519971211751
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,1,float16,float16,0,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,1,float16,fp8,0,0.3258026639620463
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,1,float16,fp8,0,0.29576534032821655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,1,fp8,fp8,0,0.2954240043958028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,1,fp8,fp8,0,0.27357866366704303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,1,float16,float16,0,0.29918400446573895
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,1,float16,fp8,0,0.2961066762606303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,1,fp8,fp8,0,0.2769920031229655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,1,float16,float16,0,0.303274671236674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,1,float16,fp8,0,0.303274671236674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,1,fp8,fp8,0,0.27801066637039185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,1,float16,float16,0,0.3063466747601827
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,1,float16,fp8,0,0.3104533354441325
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,1,fp8,fp8,0,0.28279467423756915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,1,float16,float16,0,0.19643733898798624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,1,float16,fp8,0,0.19950934251149496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,1,float16,float16,0,0.18653867642084757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,1,fp8,fp8,0,0.18585066000620523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,1,float16,fp8,0,0.1848319967587789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,1,fp8,fp8,0,0.17356799046198526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,1,float16,float16,0,0.1868799924850464
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,1,float16,fp8,0,0.18944533665974936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,1,fp8,fp8,0,0.17271467049916586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,1,float16,float16,0,0.18569600582122803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,1,float16,fp8,0,0.18978132804234824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,1,fp8,fp8,0,0.17493865887324014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,1,float16,float16,0,0.18824533621470133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,1,float16,fp8,0,0.18875199556350708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,1,fp8,fp8,0,0.17527467012405396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,1,float16,float16,0,2.331984043121338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,1,float16,fp8,0,2.3350613911946616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,1,fp8,fp8,0,2.220032056172689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,1,float16,float16,0,2.3425706227620444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,1,float16,fp8,0,2.340522607167562
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,1,fp8,fp8,0,2.227029323577881
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,1,float16,float16,0,2.3642452557881675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,1,float16,fp8,0,2.3656105995178223
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,1,fp8,fp8,0,2.249903996785482
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,1,float16,float16,0,2.381312052408854
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,1,float16,fp8,0,2.4011093775431314
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,1,fp8,fp8,0,2.2857386271158853
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,1,float16,float16,0,1.190229336420695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,1,float16,float16,0,1.31550399462382
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,1,float16,fp8,0,1.3122453689575195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,1,float16,fp8,0,1.2110400199890137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,1,fp8,fp8,0,1.250986655553182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,1,fp8,fp8,0,1.1371520360310872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,1,float16,float16,0,1.2008053461710613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,1,float16,fp8,0,1.2086613178253174
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,1,fp8,fp8,0,1.1422719955444336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,1,float16,float16,0,1.2045653661092122
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,1,float16,fp8,0,1.211733341217041
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,1,fp8,fp8,0,1.1504639784495037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,1,float16,float16,0,1.2219680150349934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,1,float16,fp8,0,1.2363093694051106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,1,float16,float16,0,0.697002649307251
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,1,fp8,fp8,0,1.166160027186076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,1,float16,fp8,0,0.7034880320231119
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,1,float16,float16,0,0.6398293177286783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,1,float16,fp8,0,0.6357386509577433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,1,fp8,fp8,0,0.6597973505655924
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,1,fp8,fp8,0,0.6014240185419718
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,1,float16,float16,0,0.6456319888432821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,1,float16,fp8,0,0.6370986700057983
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,1,fp8,fp8,0,0.6034773190816244
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,1,float16,float16,0,0.6480213403701782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,1,float16,fp8,0,0.6507413387298584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,1,fp8,fp8,0,0.6041599909464518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,1,float16,float16,0,0.6597919861475626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,1,float16,fp8,0,0.659114678700765
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,1,float16,float16,0,0.3763200044631958
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,1,fp8,fp8,0,0.6164480050404867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,1,float16,fp8,0,0.3869119882583618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,1,float16,float16,0,0.3473066488901774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,1,float16,fp8,0,0.3510613441467285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,1,fp8,fp8,0,0.3592640161514282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,1,fp8,fp8,0,0.33023999134699505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,1,float16,float16,0,0.35072000821431476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,1,float16,fp8,0,0.35514668623606366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,1,fp8,fp8,0,0.3319466710090637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,1,float16,float16,0,0.3575466473897298
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,1,float16,fp8,0,0.3548213243484497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,1,fp8,fp8,0,0.3333119948705037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,1,float16,float16,0,0.3619840145111084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,1,float16,fp8,0,0.36538668473561603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,1,float16,float16,0,0.2228906750679016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,1,fp8,fp8,0,0.33911999066670734
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,1,float16,float16,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,1,float16,fp8,0,0.2025866707166036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,1,float16,fp8,0,0.2249386707941691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,1,fp8,fp8,0,0.21094399690628052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,1,fp8,fp8,0,0.18995199600855509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,1,float16,float16,0,0.20360533396402994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,1,float16,fp8,0,0.2027519941329956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,1,fp8,fp8,0,0.19114667177200317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,1,float16,float16,0,0.20735466480255127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,1,float16,fp8,0,0.20360533396402994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,1,fp8,fp8,0,0.1949173410733541
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,1,float16,float16,0,0.21077332894007364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,1,float16,fp8,0,0.2126506765683492
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,1,fp8,fp8,0,0.19883199532826742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,1,float16,float16,0,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,1,float16,fp8,0,0.14317867159843445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,1,float16,float16,0,0.13260799646377563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,1,float16,fp8,0,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,1,fp8,fp8,0,0.1293706695238749
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,1,fp8,fp8,0,0.1353386640548706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,1,float16,float16,0,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,1,float16,fp8,0,0.13499200344085693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,1,fp8,fp8,0,0.12834133704503378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,1,float16,float16,0,0.13363200426101685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,1,float16,fp8,0,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,1,fp8,fp8,0,0.12833600242932638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,1,float16,float16,0,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,1,float16,fp8,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,1,fp8,fp8,0,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,1,float16,float16,0,2.7161601384480796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,1,float16,fp8,0,2.7200854619344077
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,1,fp8,fp8,0,2.644986629486084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,1,float16,float16,0,2.7282721201578775
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,1,float16,fp8,0,2.7356160481770835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,1,fp8,fp8,0,2.6623892784118652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,1,float16,float16,0,2.788693428039551
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,1,float16,fp8,0,2.781866709391276
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,1,fp8,fp8,0,2.718378702799479
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,1,float16,float16,0,2.774357477823893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,1,float16,fp8,0,2.7767467498779297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,1,fp8,fp8,0,2.7369813919067383
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,1,float16,float16,0,1.3537279764811199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,1,float16,float16,0,1.4943572680155437
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,1,float16,fp8,0,1.508010705312093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,1,float16,fp8,0,1.3578240076700847
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,1,fp8,fp8,0,1.4820693333943684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,1,fp8,fp8,0,1.3380266825358074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,1,float16,float16,0,1.3776267369588215
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,1,float16,fp8,0,1.368735949198405
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,1,fp8,fp8,0,1.3390506108601887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,1,float16,float16,0,1.3830827077229817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,1,float16,fp8,0,1.373509407043457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,1,fp8,fp8,0,1.3561174074808757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,1,float16,float16,0,1.401861349741618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,1,float16,fp8,0,1.4062933921813965
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,1,float16,float16,0,0.7662933667500814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,1,fp8,fp8,0,1.3827412923177083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,1,float16,fp8,0,0.7792747020721436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,1,float16,float16,0,0.7007466952006022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,1,float16,fp8,0,0.7099733352661133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,1,fp8,fp8,0,0.7662933667500814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,1,fp8,fp8,0,0.6881546974182129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,1,float16,float16,0,0.7038293679555258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,1,float16,fp8,0,0.711680014928182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,1,fp8,fp8,0,0.6929066975911459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,1,float16,float16,0,0.7150932947794596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,1,float16,fp8,0,0.7123733361562093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,1,fp8,fp8,0,0.7014346917470297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,1,float16,float16,0,0.7263466517130533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,1,float16,fp8,0,0.7287466526031494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,1,fp8,fp8,0,0.7103093465169271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,1,float16,float16,0,0.4113066593805949
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,1,float16,fp8,0,0.41676799456278485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,1,float16,float16,0,0.37495466073354083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,1,float16,fp8,0,0.37700800100962323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,1,fp8,fp8,0,0.3671040137608846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,1,fp8,fp8,0,0.40447998046875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,1,float16,float16,0,0.37563733259836835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,1,float16,fp8,0,0.37939198811848956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,1,fp8,fp8,0,0.3694933255513509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,1,float16,float16,0,0.38178133964538574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,1,float16,fp8,0,0.381440003712972
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,1,fp8,fp8,0,0.37324798107147217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,1,float16,float16,0,0.3858773310979207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,1,float16,fp8,0,0.39031465848286945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,1,fp8,fp8,0,0.377344012260437
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,1,float16,float16,0,0.2300586700439453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,1,float16,float16,0,0.20598934094111124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,1,float16,fp8,0,0.20855466524759927
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,1,float16,fp8,0,0.233130673567454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,1,fp8,fp8,0,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,1,fp8,fp8,0,0.20222934087117514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,1,float16,float16,0,0.20804266134897867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,1,float16,fp8,0,0.20803733666737875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,1,fp8,fp8,0,0.20565332969029745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,1,float16,float16,0,0.20992000897725424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,1,float16,fp8,0,0.21264533201853433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,1,fp8,fp8,0,0.2063360015551249
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,1,float16,float16,0,0.2146986722946167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,1,float16,fp8,0,0.21435733636220297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,1,fp8,fp8,0,0.21094399690628052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,1,float16,float16,0,0.13414399822553
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,1,float16,fp8,0,0.13619200388590494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,1,float16,float16,0,0.12425600488980611
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,1,float16,fp8,0,0.12220266461372375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,1,fp8,fp8,0,0.13379733761151633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,1,fp8,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,1,float16,float16,0,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,1,float16,fp8,0,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,1,fp8,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,1,float16,float16,0,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,1,float16,fp8,0,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,1,float16,float16,0,0.1232266624768575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,1,float16,fp8,0,0.12764267126719156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,1,fp8,fp8,0,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,1,float16,fp8,0,0.0890826682249705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,1,float16,fp8,0,0.08498666683832805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,1,fp8,fp8,0,0.08124266564846039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,1,float16,float16,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,1,float16,fp8,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,1,fp8,fp8,0,0.08089066545168559
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,1,float16,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,1,float16,float16,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,1,float16,fp8,0,0.08636266986529033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,1,float16,float16,0,1.9078826904296875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,1,float16,fp8,0,1.9109546343485515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,1,fp8,fp8,0,1.8901279767354329
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,1,float16,float16,0,1.9164160092671711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,1,float16,fp8,0,1.908565362294515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,1,fp8,fp8,0,1.9095892906188965
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,1,float16,float16,0,1.9474773406982422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,1,float16,fp8,0,1.944053332010905
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,1,fp8,fp8,0,1.9437227249145508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,1,float16,float16,0,1.9553279876708984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,1,float16,fp8,0,1.9474773406982422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,1,fp8,fp8,0,1.9594240188598633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,1,float16,float16,0,0.9579520225524902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,1,float16,float16,0,1.0579626560211182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,1,float16,fp8,0,1.0651360352834065
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,1,float16,fp8,0,0.9589707056681315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,1,fp8,fp8,0,1.070250670115153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,1,fp8,fp8,0,0.9606773058573405
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,1,float16,float16,0,0.9654613335927328
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,1,float16,fp8,0,0.9641013145446777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,1,fp8,fp8,0,0.9644426504770914
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,1,float16,float16,0,0.9739946524302164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,1,float16,fp8,0,0.9753599961598715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,1,fp8,fp8,0,0.9750186602274576
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,1,float16,float16,0,0.9815093676249186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,1,float16,fp8,0,0.9907146294911703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,1,fp8,fp8,0,0.9975466728210449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,1,float16,float16,0,0.5471573273340861
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,1,float16,fp8,0,0.5502293507258097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,1,float16,float16,0,0.4985119899113973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,1,float16,fp8,0,0.49885865052541095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,1,fp8,fp8,0,0.559440016746521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,1,fp8,fp8,0,0.4991999864578247
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,1,float16,float16,0,0.502613345781962
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,1,float16,fp8,0,0.5012480020523071
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,1,fp8,fp8,0,0.5046613216400146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,1,float16,float16,0,0.501584013303121
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,1,float16,fp8,0,0.508074680964152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,1,fp8,fp8,0,0.5070346593856812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,1,float16,float16,0,0.514901320139567
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,1,float16,fp8,0,0.5142186482747396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,1,float16,float16,0,0.2943999965985616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,1,fp8,fp8,0,0.5172906716664633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,1,float16,fp8,0,0.3012266755104065
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,1,float16,float16,0,0.26572267214457196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,1,float16,fp8,0,0.2691359917322795
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,1,fp8,fp8,0,0.2698240081469218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,1,fp8,fp8,0,0.29815467198689777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,1,float16,float16,0,0.2691413362820943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,1,float16,fp8,0,0.26948267221450806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,1,fp8,fp8,0,0.27221866448720294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,1,float16,float16,0,0.27051732937494916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,1,float16,fp8,0,0.2725546757380168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,1,fp8,fp8,0,0.27460267146428424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,1,float16,float16,0,0.27767467498779297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,1,float16,fp8,0,0.27801599105199176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,1,float16,float16,0,0.16725865999857584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,1,fp8,fp8,0,0.2797226707140605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,1,float16,fp8,0,0.1704960068066915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,1,fp8,fp8,0,0.16810667514801025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,1,float16,float16,0,0.14899200201034546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,1,float16,fp8,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,1,fp8,fp8,0,0.14916266997655234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,1,float16,float16,0,0.14814399679501852
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,1,float16,fp8,0,0.1518933375676473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,1,fp8,fp8,0,0.15035733580589294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,1,float16,float16,0,0.15239999691645303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,1,float16,fp8,0,0.15103999773661295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,1,fp8,fp8,0,0.15223466356595358
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,1,float16,float16,0,0.15548266967137656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,1,float16,fp8,0,0.15871999661127725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,1,float16,float16,0,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,1,float16,fp8,0,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,1,fp8,fp8,0,0.15803733468055725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,1,float16,float16,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,1,fp8,fp8,0,0.09113599856694539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,1,float16,fp8,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,1,fp8,fp8,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,1,float16,float16,0,0.09353066484133403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,1,float16,float16,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,1,float16,fp8,0,0.09557333588600159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,1,fp8,fp8,0,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,1,float16,float16,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,1,float16,fp8,0,0.09728533029556274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,1,fp8,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,1,float16,float16,0,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,1,float16,fp8,0,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,1,float16,float16,0,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,1,float16,fp8,0,0.06180266539255778
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,1,fp8,fp8,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,1,float16,fp8,0,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,1,float16,float16,0,0.06313600142796834
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,1,fp8,fp8,0,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,1,float16,float16,0,2.0879359245300293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,1,float16,fp8,0,2.079066594441732
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,1,fp8,fp8,0,2.2029600143432617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,1,float16,float16,0,2.1039786338806152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,1,float16,fp8,0,2.112170696258545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,1,fp8,fp8,0,2.330629348754883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,1,float16,float16,0,2.1398186683654785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,1,float16,fp8,0,2.153477350870768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,1,fp8,fp8,0,2.322773297627767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,1,float16,float16,0,2.2196906407674155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,1,float16,fp8,0,2.2152320543924966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,1,fp8,fp8,0,2.3751680056254068
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,1,float16,float16,0,1.0586453278859456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,1,float16,float16,0,1.179136037826538
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,1,float16,fp8,0,1.0562559763590496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,1,float16,fp8,0,1.1548853715260823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,1,fp8,fp8,0,1.242799997329712
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,1,fp8,fp8,0,1.1142826875050862
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,1,float16,float16,0,1.068890651067098
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,1,float16,fp8,0,1.0637653668721516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,1,fp8,fp8,0,1.1535306771596272
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,1,float16,float16,0,1.0791146755218506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,1,float16,fp8,0,1.074005365371704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,1,fp8,fp8,0,1.1450026830037434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,1,float16,float16,0,1.1071093082427979
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,1,float16,fp8,0,1.0934613545735676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,1,float16,float16,0,0.6010933319727579
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,1,fp8,fp8,0,1.1816960175832112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,1,float16,fp8,0,0.5922240018844604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,1,float16,float16,0,0.5396480162938436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,1,float16,fp8,0,0.5396480162938436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,1,fp8,fp8,0,0.6326613426208496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,1,fp8,fp8,0,0.5676373243331909
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,1,float16,float16,0,0.5444213151931763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,1,float16,fp8,0,0.5475039879480997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,1,fp8,fp8,0,0.5765120188395182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,1,float16,float16,0,0.5488586823145548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,1,float16,fp8,0,0.5492053429285685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,1,fp8,fp8,0,0.5840160051981608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,1,float16,float16,0,0.5614933172861735
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,1,float16,fp8,0,0.5577386617660522
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,1,float16,float16,0,0.3169333338737488
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,1,fp8,fp8,0,0.5905119975407919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,1,float16,float16,0,0.284496009349823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,1,float16,fp8,0,0.3097599943478902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,1,float16,fp8,0,0.281765341758728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,1,fp8,fp8,0,0.32922667264938354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,1,fp8,fp8,0,0.2954240043958028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,1,float16,float16,0,0.28484266996383667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,1,float16,fp8,0,0.2855253418286641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,1,fp8,fp8,0,0.2985066572825114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,1,float16,float16,0,0.28962133328119916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,1,float16,fp8,0,0.2872320016225179
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,1,fp8,fp8,0,0.3015679915746053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,1,float16,float16,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,1,float16,float16,0,0.17186667521794638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,1,float16,fp8,0,0.2937279939651489
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,1,fp8,fp8,0,0.3080480098724365
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,1,float16,float16,0,0.1534293293952942
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,1,float16,fp8,0,0.15240533153216043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,1,float16,fp8,0,0.16827734311421713
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,1,fp8,fp8,0,0.17561600605646768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,1,fp8,fp8,0,0.15751999616622925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,1,float16,float16,0,0.15292267004648843
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,1,float16,fp8,0,0.1525920033454895
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,1,fp8,fp8,0,0.15889066457748413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,1,float16,float16,0,0.1565013329188029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,1,float16,fp8,0,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,1,fp8,fp8,0,0.16230400403340658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,1,float16,float16,0,0.1585493286450704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,1,float16,fp8,0,0.15888532996177673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,1,fp8,fp8,0,0.16537599762280783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,1,float16,float16,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,1,float16,float16,0,0.08797333637873332
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,1,float16,fp8,0,0.09678933024406433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,1,fp8,fp8,0,0.10172266761461894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,1,float16,fp8,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,1,fp8,fp8,0,0.08738133311271667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,1,float16,fp8,0,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,1,fp8,fp8,0,0.08738133311271667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,1,float16,fp8,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,1,fp8,fp8,0,0.08806932965914409
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,1,float16,float16,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,1,float16,fp8,0,0.09012266993522644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,1,fp8,fp8,0,0.08975999553998311
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,1,float16,float16,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,1,float16,fp8,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,1,float16,float16,0,0.05665066838264465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,1,fp8,fp8,0,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,1,fp8,fp8,0,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,1,float16,float16,0,0.056314667065938316
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,1,float16,fp8,0,0.05596800148487091
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,1,fp8,fp8,0,0.05530133346716563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,1,float16,float16,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,1,float16,fp8,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,1,fp8,fp8,0,0.05495999753475189
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,1,float16,float16,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,1,fp8,fp8,0,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,1,float16,float16,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,1,fp8,fp8,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,1,float16,float16,0,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,1,float16,float16,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,1,float16,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,1,float16,float16,0,1.8245654106140137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,1,float16,fp8,0,1.8228905995686848
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,1,fp8,fp8,0,1.9681226412455242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,1,float16,float16,0,1.8860373497009277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,1,float16,fp8,0,1.8966186841328938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,1,fp8,fp8,0,2.089301268259684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,1,float16,float16,0,1.895253340403239
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,1,float16,fp8,0,1.9041226704915364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,1,fp8,fp8,0,2.1340160369873047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,1,float16,float16,0,1.9682879447937012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,1,float16,fp8,0,1.9512267112731934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,1,fp8,fp8,0,2.128554662068685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,1,float16,float16,0,0.9241600036621094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,1,float16,float16,0,1.0446453094482422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,1,float16,fp8,0,0.9214186668395996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,1,float16,fp8,0,1.0255359808603923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,1,fp8,fp8,0,1.134768009185791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,1,fp8,fp8,0,0.9920852979024252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,1,float16,float16,0,0.936789353688558
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,1,float16,fp8,0,0.9371360143025717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,1,fp8,fp8,0,1.0388320287068684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,1,float16,float16,0,0.9480533599853516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,1,fp8,fp8,0,1.0289493401845295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,1,float16,fp8,0,0.942255973815918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,1,float16,float16,0,0.9729706446329752
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,1,float16,fp8,0,0.962394634882609
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,1,float16,float16,0,0.535210649172465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,1,fp8,fp8,0,1.0685439904530842
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,1,float16,float16,0,0.47121067841847736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,1,float16,fp8,0,0.5234293142954508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,1,float16,fp8,0,0.47360531489054364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,1,fp8,fp8,0,0.5724159876505533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,1,fp8,fp8,0,0.5073920090993246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,1,float16,float16,0,0.4814506769180298
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,1,float16,fp8,0,0.4787199894587199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,1,fp8,fp8,0,0.5179680188496908
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,1,float16,float16,0,0.48315731684366864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,1,float16,fp8,0,0.48349865277608234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,1,fp8,fp8,0,0.5237760146458944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,1,float16,float16,0,0.49373865127563477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,1,float16,fp8,0,0.49134933948516846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,1,fp8,fp8,0,0.5355519851048788
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,1,float16,float16,0,0.28006933132807416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,1,float16,fp8,0,0.2739199995994568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,1,float16,float16,0,0.2481493353843689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,1,float16,fp8,0,0.2474666635195414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,1,fp8,fp8,0,0.2978079915046692
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,1,fp8,fp8,0,0.26401599248250324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,1,float16,float16,0,0.2505439917246501
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,1,float16,fp8,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,1,fp8,fp8,0,0.2677759925524394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,1,float16,float16,0,0.25190399090449017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,1,float16,fp8,0,0.2529173294703166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,1,fp8,fp8,0,0.2701653242111206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,1,float16,float16,0,0.25941334168116253
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,1,float16,fp8,0,0.2563413381576538
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,1,float16,float16,0,0.1508639951546987
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,1,fp8,fp8,0,0.2783520023028056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,1,float16,float16,0,0.13209600249926248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,1,float16,fp8,0,0.13516799608866373
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,1,float16,fp8,0,0.14882133404413858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,1,fp8,fp8,0,0.16025599837303162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,1,fp8,fp8,0,0.13893333077430725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,1,float16,float16,0,0.1346560021241506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,1,float16,fp8,0,0.13362666964530945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,1,fp8,fp8,0,0.14284800489743552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,1,float16,float16,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,1,float16,fp8,0,0.1353386640548706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,1,fp8,fp8,0,0.14591999848683676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,1,float16,float16,0,0.1389173368612925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,1,float16,fp8,0,0.1389226714769999
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,1,fp8,fp8,0,0.14985066652297974
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,1,float16,float16,0,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,1,float16,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,1,fp8,fp8,0,0.09181867043177287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,1,float16,fp8,0,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,1,fp8,fp8,0,0.07747733096281688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,1,float16,float16,0,0.07714133461316426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,1,float16,fp8,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,1,float16,float16,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,1,float16,fp8,0,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,1,fp8,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,1,float16,float16,0,0.07713599999745686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,1,float16,fp8,0,0.07714133461316426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,1,fp8,fp8,0,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,1,float16,float16,0,0.05186133086681366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,1,float16,fp8,0,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,1,float16,float16,0,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,1,fp8,fp8,0,0.05187733471393585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,1,float16,fp8,0,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,1,float16,float16,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,1,fp8,fp8,0,0.05052266518274943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,1,float16,float16,0,0.049839998284975685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,1,float16,fp8,0,0.05086400111516317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,1,fp8,fp8,0,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,1,float16,float16,0,0.032074667513370514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,1,fp8,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,1,fp8,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,1,float16,float16,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,1,float16,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,1,fp8,fp8,0,0.027664000789324444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,1,fp8,fp8,0,0.027984000742435455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,1,float16,float16,0,0.03073066721359889
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,1,float16,fp8,0,0.029029332101345062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,1,float16,float16,0,0.6942666371663412
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,1,float16,fp8,0,0.6925600369771322
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,1,fp8,fp8,0,0.7294293244679769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,1,float16,float16,0,0.7069013118743896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,1,float16,fp8,0,0.7045119603474935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,1,fp8,fp8,0,0.7686879634857178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,1,float16,float16,0,0.718506654103597
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,1,float16,fp8,0,0.7130453586578369
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,1,fp8,fp8,0,0.7669760386149088
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,1,float16,float16,0,0.7461547056833903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,1,float16,fp8,0,0.7338666915893555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,1,fp8,fp8,0,0.8162986437479655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,1,float16,float16,0,0.35788798332214355
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,1,float16,float16,0,0.4288906653722127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,1,float16,fp8,0,0.41779200236002606
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,1,float16,fp8,0,0.356879989306132
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,1,fp8,fp8,0,0.442197322845459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,1,fp8,fp8,0,0.3739306529362996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,1,float16,float16,0,0.3643626769383748
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,1,float16,fp8,0,0.36164267857869464
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,1,fp8,fp8,0,0.3814293146133423
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,1,float16,float16,0,0.3729120095570882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,1,float16,fp8,0,0.36881065368652344
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,1,fp8,fp8,0,0.3930506706237793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,1,float16,float16,0,0.3834880193074544
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,1,float16,fp8,0,0.3800746599833171
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,1,float16,float16,0,0.2310826579729716
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,1,fp8,fp8,0,0.4055039882659912
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,1,float16,fp8,0,0.22254933913548788
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,1,float16,float16,0,0.1909760038057963
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,1,fp8,fp8,0,0.23414933681488037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,1,float16,fp8,0,0.19285333156585693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,1,fp8,fp8,0,0.19709867238998413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,1,float16,float16,0,0.19473065932591757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,1,float16,fp8,0,0.19490132729212442
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,1,fp8,fp8,0,0.2039466698964437
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,1,float16,float16,0,0.1986400087674459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,1,float16,fp8,0,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,1,fp8,fp8,0,0.20684800545374551
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,1,float16,float16,0,0.20462934176127115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,1,float16,fp8,0,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,1,float16,float16,0,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,1,fp8,fp8,0,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,1,float16,fp8,0,0.10786666472752889
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,1,fp8,fp8,0,0.12834133704503378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,1,fp8,fp8,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,1,float16,float16,0,0.10889066259066264
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,1,float16,fp8,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,1,fp8,fp8,0,0.11161067088445027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,1,float16,float16,0,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,1,float16,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,1,fp8,fp8,0,0.11332799990971883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,1,float16,float16,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,1,float16,fp8,0,0.11195199688275655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,1,float16,fp8,0,0.06484800080458324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,1,float16,float16,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,1,fp8,fp8,0,0.06451733410358429
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,1,float16,float16,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,1,fp8,fp8,0,0.06622399886449178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,1,float16,fp8,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,1,float16,float16,0,0.044362664222717285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,1,fp8,fp8,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,1,float16,fp8,0,0.043338666359583534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,1,fp8,fp8,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,1,float16,float16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,1,fp8,fp8,0,0.030389333764712017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,1,fp8,fp8,0,0.025946666797002155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,1,float16,float16,0,0.3797333240509033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,1,float16,fp8,0,0.37837334473927814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,1,fp8,fp8,0,0.39662933349609375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,1,float16,float16,0,0.3869119882583618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,1,float16,fp8,0,0.3851999839146932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,1,fp8,fp8,0,0.40379734834035236
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,1,float16,float16,0,0.389631986618042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,1,float16,fp8,0,0.38894931475321454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,1,fp8,fp8,0,0.4160799980163574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,1,float16,float16,0,0.4020906686782837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,1,float16,fp8,0,0.39867734909057617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,1,fp8,fp8,0,0.43299734592437744
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,1,float16,float16,0,0.23449599742889404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,1,float16,float16,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,1,float16,fp8,0,0.22971733411153158
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,1,float16,float16,0,0.19950934251149496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,1,fp8,fp8,0,0.24268800020217896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,1,float16,fp8,0,0.2005280057589213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,1,fp8,fp8,0,0.2056480050086975
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,1,float16,float16,0,0.20224533478418985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,1,float16,fp8,0,0.20342934131622314
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,1,fp8,fp8,0,0.212991992632548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,1,float16,float16,0,0.20377600193023682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,1,float16,fp8,0,0.2032639980316162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,1,fp8,fp8,0,0.2160586714744568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,1,float16,float16,0,0.21145067612330118
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,1,fp8,fp8,0,0.2225386699040731
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,1,float16,float16,0,0.12868266304334006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,1,float16,fp8,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,1,float16,float16,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,1,fp8,fp8,0,0.13312533497810364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,1,float16,fp8,0,0.10992532968521118
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,1,fp8,fp8,0,0.11025066177050273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,1,float16,float16,0,0.11128000418345134
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,1,float16,fp8,0,0.11128000418345134
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,1,fp8,fp8,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,1,float16,float16,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,1,float16,fp8,0,0.11126933495203654
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,1,fp8,fp8,0,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,1,float16,float16,0,0.11674132943153381
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,1,float16,fp8,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,1,fp8,fp8,0,0.12116799751917522
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,1,float16,float16,0,0.06554133196671803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,1,fp8,fp8,0,0.06246933341026306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,1,float16,fp8,0,0.06553066770235698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,1,fp8,fp8,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,1,float16,float16,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,1,float16,float16,0,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,1,float16,fp8,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,1,fp8,fp8,0,0.06756799916426341
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,1,float16,float16,0,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,1,float16,fp8,0,0.04472533365090688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,1,fp8,fp8,0,0.045040001471837364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,1,float16,fp8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,1,float16,float16,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,1,float16,fp8,0,0.04266133407751719
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,1,float16,float16,0,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,1,float16,float16,0,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,1,float16,float16,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,1,float16,float16,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,1,float16,float16,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,1,float16,fp8,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,1,float16,float16,0,0.25702399015426636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,1,float16,fp8,0,0.25804799795150757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,1,fp8,fp8,0,0.28074665864308673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,1,float16,float16,0,0.26077866554260254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,1,float16,fp8,0,0.2597493330637614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,1,fp8,fp8,0,0.285861333211263
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,1,float16,float16,0,0.26077866554260254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,1,float16,fp8,0,0.26146133740743
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,1,fp8,fp8,0,0.29064534107844037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,1,float16,float16,0,0.2674400011698405
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,1,float16,fp8,0,0.2653973301251729
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,1,fp8,fp8,0,0.29576534032821655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,1,float16,float16,0,0.15445866187413534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,1,float16,fp8,0,0.15240533153216043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,1,float16,float16,0,0.13875733812650046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,1,fp8,fp8,0,0.17015999555587769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,1,float16,fp8,0,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,1,fp8,fp8,0,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,1,float16,float16,0,0.13942933082580566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,1,float16,fp8,0,0.13773866494496664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,1,fp8,fp8,0,0.1513813336690267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,1,float16,float16,0,0.13942399621009827
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,1,float16,fp8,0,0.14080533385276794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,1,fp8,fp8,0,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,1,float16,float16,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,1,float16,fp8,0,0.1431946655114492
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,1,fp8,fp8,0,0.15940266847610474
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,1,float16,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,1,float16,fp8,0,0.07817066709200542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,1,float16,fp8,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,1,fp8,fp8,0,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,1,float16,fp8,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,1,fp8,fp8,0,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,1,float16,fp8,0,0.049141332507133484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,1,float16,fp8,0,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,1,float16,fp8,0,0.03311999887228012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,1,float16,fp8,0,0.20923733711242676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,1,float16,float16,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,1,float16,float16,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,1,float16,fp8,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,1,float16,float16,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,1,fp8,fp8,0,0.017690667261679966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,1,float16,float16,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,1,float16,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,1,fp8,fp8,0,0.017674667139848072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,1,fp8,fp8,0,0.01664000004529953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,1,float16,float16,0,0.20836800336837769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,1,float16,fp8,0,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,1,fp8,fp8,0,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,1,float16,float16,0,0.20669333140055338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,1,float16,fp8,0,0.20684266090393066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,1,fp8,fp8,0,0.23006399472554526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,1,float16,float16,0,0.20906666914621988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,1,float16,fp8,0,0.20736000935236612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,1,fp8,fp8,0,0.23278933763504028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,1,float16,float16,0,0.21077332894007364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,1,float16,fp8,0,0.21009600162506104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,1,fp8,fp8,0,0.23825067281723022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,1,float16,float16,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,1,float16,fp8,0,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,1,fp8,fp8,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,1,float16,float16,0,0.11195733149846394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,1,fp8,fp8,0,0.12049600481987
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,1,float16,float16,0,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,1,fp8,fp8,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,1,float16,float16,0,0.11161067088445027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,1,fp8,fp8,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,1,float16,float16,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,1,float16,fp8,0,0.1129866639773051
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,1,fp8,fp8,0,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,1,float16,float16,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,1,fp8,fp8,0,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,1,float16,float16,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,1,fp8,fp8,0,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,1,float16,float16,0,0.06449600060780843
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,1,fp8,fp8,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,1,float16,float16,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,1,float16,float16,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,1,fp8,fp8,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,1,float16,float16,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,1,float16,float16,0,0.03038399914900462
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,1,float16,fp8,0,0.021514666577180225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,1,fp8,fp8,0,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,1,float16,float16,0,0.02149333308140437
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,1,float16,float16,0,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,1,fp8,fp8,0,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,1,float16,float16,0,0.017349333812793095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,1,fp8,fp8,0,0.018378666291634243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,1,float16,float16,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,1,fp8,fp8,0,0.016645333419243496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,1,float16,float16,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,1,float16,float16,0,0.1803893248240153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,1,float16,fp8,0,0.1807360053062439
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,1,fp8,fp8,0,0.1998400092124939
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,1,float16,float16,0,0.1807360053062439
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,1,float16,fp8,0,0.1807360053062439
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,1,fp8,fp8,0,0.20087466637293497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,1,float16,float16,0,0.1824373404184977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,1,float16,fp8,0,0.18074132998784384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,1,fp8,fp8,0,0.2027519941329956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,1,float16,float16,0,0.18284799655278525
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,1,float16,fp8,0,0.18244266510009766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,1,fp8,fp8,0,0.20616533358891806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,1,float16,float16,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,1,float16,fp8,0,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,1,float16,float16,0,0.0986400047938029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,1,float16,fp8,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,1,fp8,fp8,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,1,float16,fp8,0,0.0986400047938029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,1,fp8,fp8,0,0.10778133074442546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,1,float16,float16,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,1,float16,fp8,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,1,fp8,fp8,0,0.1088800032933553
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,1,float16,float16,0,0.09898133079210918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,1,float16,fp8,0,0.0993173321088155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,1,fp8,fp8,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,1,float16,float16,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,1,fp8,fp8,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,1,fp8,fp8,0,0.062133332093556724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,1,fp8,fp8,0,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,1,float16,float16,0,0.05904533465703329
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,1,float16,fp8,0,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,1,fp8,fp8,0,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,1,float16,fp8,0,0.038917332887649536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,1,float16,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,1,float16,fp8,0,0.026288000245889027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,1,fp8,fp8,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,1,float16,fp8,0,0.021733333667119343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,1,float16,fp8,0,0.018320000420014065
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,1,float16,fp8,0,0.01836266616980235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,1,fp8,fp8,0,0.017978666971127193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,1,float16,float16,0,6.683477401733398
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,1,float16,fp8,0,6.303402582804362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,1,fp8,fp8,0,5.571584065755208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,1,float16,float16,0,6.9259999593098955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,1,float16,fp8,0,6.924293518066406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,1,fp8,fp8,0,5.597525278727214
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,1,float16,float16,0,6.732287724812825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,1,float16,fp8,0,6.9225813547770185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,1,fp8,fp8,0,5.620053609212239
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,1,float16,float16,0,6.612138748168945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,1,float16,fp8,0,7.005872090657552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,1,fp8,fp8,0,5.673983891805013
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,1,float16,float16,0,3.473408063252767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,1,float16,fp8,0,3.4566825230916343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,1,fp8,fp8,0,2.987349192301432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,1,float16,float16,0,3.3396053314208984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,1,float16,fp8,0,3.412480036417643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,1,fp8,fp8,0,2.871642748514811
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,1,float16,float16,0,3.315199851989746
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,1,float16,fp8,0,3.330560048421224
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,1,fp8,fp8,0,2.887338638305664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,1,float16,float16,0,3.3544534047444663
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,1,float16,fp8,0,3.396437327067057
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,1,fp8,fp8,0,2.8991146087646484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,1,float16,float16,0,3.3704961140950522
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,1,float16,fp8,0,3.3824427922566733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,1,fp8,fp8,0,2.9315414428710938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,1,float16,float16,0,1.8189652760823567
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,1,float16,fp8,0,1.8370614051818848
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,1,float16,float16,0,1.7525919278462727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,1,fp8,fp8,0,1.5842986106872559
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,1,float16,fp8,0,1.7385813395182292
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,1,fp8,fp8,0,1.5400853157043457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,1,float16,float16,0,1.7583786646525066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,1,float16,fp8,0,1.778175989786784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,1,fp8,fp8,0,1.5431733131408691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,1,float16,float16,0,1.7652053833007812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,1,float16,fp8,0,1.7880746523539226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,1,fp8,fp8,0,1.553749402364095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,1,float16,float16,0,1.7566720644632976
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,1,float16,fp8,0,1.815733273824056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,1,float16,float16,0,1.0282666683197021
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,1,fp8,fp8,0,1.5629653930664062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,1,float16,fp8,0,1.0251946449279785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,1,fp8,fp8,0,0.8946346441904703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,1,float16,float16,0,1.0088106791178386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,1,float16,fp8,0,1.010858694712321
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,1,fp8,fp8,0,0.8724533716837565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,1,float16,float16,0,1.0016533533732097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,1,float16,fp8,0,1.0228053728739421
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,1,fp8,fp8,0,0.8809813658396403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,1,float16,float16,0,1.0036853154500325
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,1,float16,fp8,0,1.0207573572794597
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,1,fp8,fp8,0,0.8802986939748129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,1,float16,float16,0,1.0251946449279785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,1,float16,fp8,0,1.0169973373413086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,1,fp8,fp8,0,0.8847359816233317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,1,float16,float16,0,3.8845386505126953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,1,float16,fp8,0,3.7934080759684243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,1,fp8,fp8,0,3.348479906717936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,1,float16,float16,0,4.005541483561198
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,1,float16,fp8,0,3.963904062906901
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,1,fp8,fp8,0,3.3762985865275064
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,1,float16,float16,0,3.8046773274739585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,1,float16,fp8,0,3.944789250691732
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,1,fp8,fp8,0,3.390634536743164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,1,float16,float16,0,4.0321652094523115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,1,float16,fp8,0,3.937450726826986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,1,fp8,fp8,0,3.427333196004232
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,1,float16,float16,0,2.154143969217936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,1,float16,float16,0,2.2632106145222983
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,1,float16,fp8,0,2.146986643473307
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,1,float16,fp8,0,2.013866742451986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,1,fp8,fp8,0,1.8286933898925781
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,1,fp8,fp8,0,1.747114658355713
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,1,float16,float16,0,1.99236265818278
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,1,float16,fp8,0,2.086906592051188
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,1,fp8,fp8,0,1.7549653053283691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,1,float16,float16,0,2.007040023803711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,1,float16,fp8,0,2.016597270965576
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,1,fp8,fp8,0,1.772714614868164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,1,float16,float16,0,2.0008959770202637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,1,float16,fp8,0,2.0607946713765464
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,1,float16,float16,0,1.154901345570882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,1,fp8,fp8,0,1.7850027084350586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,1,float16,float16,0,1.12008531888326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,1,float16,fp8,0,1.1252000331878662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,1,float16,fp8,0,1.0774346987406414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,1,fp8,fp8,0,0.9924266338348389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,1,fp8,fp8,0,0.9582933584849039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,1,float16,float16,0,1.1009706656138103
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,1,float16,fp8,0,1.117695967356364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,1,fp8,fp8,0,0.9583040078481039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,1,float16,float16,0,1.1282827059427898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,1,float16,fp8,0,1.1095093091328938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,1,fp8,fp8,0,0.9678506851196289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,1,float16,float16,0,1.113258679707845
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,1,float16,fp8,0,1.1409066518147786
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,1,float16,float16,0,0.6538240114847819
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,1,fp8,fp8,0,0.9702346324920654
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,1,float16,fp8,0,0.659114678700765
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,1,float16,float16,0,0.6343626578648885
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,1,float16,fp8,0,0.6263466676076254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,1,fp8,fp8,0,0.5771946509679159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,1,fp8,fp8,0,0.5556906859079996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,1,float16,float16,0,0.6370986700057983
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,1,float16,fp8,0,0.6297599871953329
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,1,fp8,fp8,0,0.5597866773605347
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,1,float16,float16,0,0.6384533246358236
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,1,float16,fp8,0,0.6432373523712158
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,1,fp8,fp8,0,0.5604693492253622
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,1,float16,float16,0,0.6507519880930582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,1,float16,fp8,0,0.6493866840998331
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,1,fp8,fp8,0,0.570026675860087
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,1,float16,float16,0,2.7450027465820312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,1,float16,fp8,0,2.8149760564168296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,1,fp8,fp8,0,2.447872002919515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,1,float16,float16,0,2.817706743876139
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,1,float16,fp8,0,2.7607040405273438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,1,fp8,fp8,0,2.4676693280537925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,1,float16,float16,0,2.8108800252278647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,1,float16,fp8,0,2.8142932256062827
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,1,fp8,fp8,0,2.474127928415934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,1,float16,float16,0,2.8767573038736978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,1,float16,fp8,0,2.8619041442871094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,1,fp8,fp8,0,2.517162640889486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,1,float16,float16,0,1.543168067932129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,1,float16,float16,0,1.4537439346313477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,1,float16,fp8,0,1.5472639401753743
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,1,float16,fp8,0,1.4858239491780598
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,1,fp8,fp8,0,1.3509972890218098
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,1,fp8,fp8,0,1.2965546449025471
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,1,float16,float16,0,1.4742132822672527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,1,float16,fp8,0,1.4469119707743328
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,1,fp8,fp8,0,1.291434685389201
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,1,float16,float16,0,1.516208012898763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,1,float16,fp8,0,1.534981409708659
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,1,fp8,fp8,0,1.3020213445027669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,1,float16,float16,0,1.4848000208536785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,1,float16,fp8,0,1.5332694053649902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,1,float16,float16,0,0.8403626283009847
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,1,fp8,fp8,0,1.3253973325093586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,1,float16,fp8,0,0.8574293454488119
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,1,float16,float16,0,0.8108373483022054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,1,float16,fp8,0,0.8279093106587728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,1,fp8,fp8,0,0.7475199699401855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,1,fp8,fp8,0,0.7137280305226644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,1,float16,float16,0,0.814250628153483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,1,float16,fp8,0,0.8326826890309652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,1,fp8,fp8,0,0.719866673151652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,1,float16,float16,0,0.8237973054250082
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,1,float16,fp8,0,0.816981315612793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,1,fp8,fp8,0,0.7215786774953207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,1,float16,float16,0,0.8364426294962565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,1,float16,float16,0,0.49715201059977215
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,1,float16,fp8,0,0.8381333351135254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,1,fp8,fp8,0,0.7284053166707357
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,1,float16,fp8,0,0.49988265832265216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,1,float16,float16,0,0.4742826620737712
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,1,float16,fp8,0,0.4715520143508911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,1,fp8,fp8,0,0.42342400550842285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,1,fp8,fp8,0,0.4411733150482178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,1,float16,float16,0,0.4773546854654948
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,1,float16,fp8,0,0.4742879867553711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,1,fp8,fp8,0,0.42683732509613037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,1,float16,float16,0,0.48420266310373944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,1,float16,fp8,0,0.48212798436482746
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,1,fp8,fp8,0,0.42718935012817383
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,1,float16,float16,0,0.48930132389068604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,1,float16,fp8,0,0.4920426607131958
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,1,fp8,fp8,0,0.4333226680755615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,1,float16,float16,0,3.614042599995931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,1,float16,fp8,0,3.708415985107422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,1,fp8,fp8,0,3.2877171834309897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,1,float16,float16,0,3.7826560338338218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,1,float16,fp8,0,3.67359987894694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,1,fp8,fp8,0,3.3037707010904946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,1,float16,float16,0,3.8302666346232095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,1,float16,fp8,0,3.7635412216186523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,1,fp8,fp8,0,3.322538693745931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,1,float16,float16,0,3.7582613627115884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,1,float16,fp8,0,3.865088144938151
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,1,fp8,fp8,0,3.3834667205810547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,1,float16,float16,0,1.9739306767781575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,1,float16,float16,0,1.9000320434570312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,1,float16,fp8,0,2.0145546595255532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,1,float16,fp8,0,1.8723840713500977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,1,fp8,fp8,0,1.6875519752502441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,1,fp8,fp8,0,1.7904586791992188
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,1,float16,float16,0,1.9075360298156738
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,1,float16,fp8,0,1.8809173901875813
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,1,fp8,fp8,0,1.693354606628418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,1,float16,float16,0,1.9430452982584636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,1,float16,fp8,0,1.9362133344014485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,1,fp8,fp8,0,1.7071839968363445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,1,float16,float16,0,2.015573342641195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,1,float16,fp8,0,1.977344036102295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,1,float16,float16,0,1.067519982655843
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,1,fp8,fp8,0,1.7358506520589192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,1,float16,float16,0,1.023146629333496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,1,float16,fp8,0,1.0596693356831868
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,1,fp8,fp8,0,0.9535146554311117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,1,float16,fp8,0,1.0303146839141846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,1,fp8,fp8,0,0.9007786909739176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,1,float16,float16,0,1.006767988204956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,1,float16,fp8,0,1.0337279637654622
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,1,fp8,fp8,0,0.9089706738789877
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,1,float16,float16,0,1.0333866278330486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,1,float16,fp8,0,1.0501226584116619
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,1,fp8,fp8,0,0.9127253691355387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,1,float16,float16,0,1.0531893571217854
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,1,float16,fp8,0,1.03765869140625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,1,fp8,fp8,0,0.9245013395945231
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,1,float16,float16,0,0.6021120150883993
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,1,float16,fp8,0,0.6072320143381754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,1,float16,float16,0,0.5707093477249146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,1,float16,fp8,0,0.5771893262863159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,1,fp8,fp8,0,0.5091040134429932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,1,fp8,fp8,0,0.53111465771993
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,1,float16,float16,0,0.5751466751098633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,1,float16,fp8,0,0.5826559861501058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,1,fp8,fp8,0,0.5108053286870321
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,1,float16,float16,0,0.575488011042277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,1,float16,fp8,0,0.582314650217692
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,1,fp8,fp8,0,0.5131946802139282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,1,float16,float16,0,0.5898240009943644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,1,float16,fp8,0,0.585045337677002
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,1,float16,float16,0,0.3595946629842122
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,1,fp8,fp8,0,0.5186560153961182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,1,float16,float16,0,0.3449173370997111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,1,float16,fp8,0,0.36369065443674725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,1,float16,fp8,0,0.3432106574376424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,1,fp8,fp8,0,0.3227253357569377
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,1,fp8,fp8,0,0.307370662689209
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,1,float16,float16,0,0.3445653518040975
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,1,float16,fp8,0,0.3438933293024699
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,1,fp8,fp8,0,0.30770667394002277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,1,float16,float16,0,0.3473066488901774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,1,float16,fp8,0,0.3479893207550049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,1,fp8,fp8,0,0.31112533807754517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,1,float16,float16,0,0.34697067737579346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,1,float16,fp8,0,0.3541333278020223
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,1,fp8,fp8,0,0.3189760049184163
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,1,float16,float16,0,2.285226662953695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,1,float16,fp8,0,2.2533119519551597
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,1,fp8,fp8,0,2.0538026491800943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,1,float16,float16,0,2.3744853337605796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,1,float16,fp8,0,2.2958079973856607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,1,fp8,fp8,0,2.064725399017334
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,1,float16,float16,0,2.337290604909261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,1,float16,fp8,0,2.3700586954752603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,1,fp8,fp8,0,2.089301268259684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,1,float16,float16,0,2.3280800183614097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,1,float16,fp8,0,2.3908692995707193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,1,fp8,fp8,0,2.131626605987549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,1,float16,float16,0,1.2948479652404785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,1,float16,float16,0,1.187503973642985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,1,float16,fp8,0,1.288709322611491
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,1,float16,fp8,0,1.2195839881896973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,1,fp8,fp8,0,1.077071984608968
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,1,fp8,fp8,0,1.1518293221791585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,1,float16,float16,0,1.1892053286234539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,1,float16,fp8,0,1.2107093334197998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,1,fp8,fp8,0,1.0794719854990642
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,1,float16,float16,0,1.2489386399586995
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,1,float16,fp8,0,1.1861333052317302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,1,fp8,fp8,0,1.0856160322825115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,1,float16,float16,0,1.2182133197784424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,1,float16,fp8,0,1.2593493461608887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,1,float16,float16,0,0.6925653616587321
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,1,fp8,fp8,0,1.1047253608703613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,1,float16,float16,0,0.6562133232752482
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,1,float16,fp8,0,0.6500693162282308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,1,float16,fp8,0,0.7045119603474935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,1,fp8,fp8,0,0.6191786527633667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,1,fp8,fp8,0,0.5799253384272257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,1,float16,float16,0,0.6538293361663818
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,1,float16,fp8,0,0.6604799826939901
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,1,fp8,fp8,0,0.5847040017445883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,1,float16,float16,0,0.6632053454717001
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,1,float16,fp8,0,0.6570666631062826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,1,fp8,fp8,0,0.5905013481775919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,1,float16,float16,0,0.6809600194295248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,1,float16,float16,0,0.3940746784210205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,1,float16,fp8,0,0.66594131787618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,1,fp8,fp8,0,0.5983573198318481
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,1,float16,fp8,0,0.3979946772257487
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,1,float16,float16,0,0.3694933255513509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,1,float16,fp8,0,0.36743998527526855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,1,fp8,fp8,0,0.35583468278249103
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,1,fp8,fp8,0,0.33433600266774494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,1,float16,float16,0,0.37084798018137616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,1,float16,fp8,0,0.3688053290049235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,1,fp8,fp8,0,0.3380906581878662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,1,float16,float16,0,0.3797333240509033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,1,float16,fp8,0,0.377344012260437
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,1,fp8,fp8,0,0.33843199412027997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,1,float16,float16,0,0.38417065143585205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,1,float16,fp8,0,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,1,float16,float16,0,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,1,fp8,fp8,0,0.3442346652348836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,1,float16,fp8,0,0.24200532833735147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,1,fp8,fp8,0,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,1,float16,float16,0,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,1,float16,fp8,0,0.22835199038187662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,1,fp8,fp8,0,0.21127466360727945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,1,float16,float16,0,0.22971733411153158
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,1,float16,fp8,0,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,1,fp8,fp8,0,0.20753065745035806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,1,float16,float16,0,0.2300586700439453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,1,float16,fp8,0,0.23073599735895792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,1,fp8,fp8,0,0.20906666914621988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,1,float16,float16,0,0.2307413419087728
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,1,float16,fp8,0,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,1,fp8,fp8,0,0.21401600042978922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,1,float16,float16,0,2.3546880086263022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,1,float16,fp8,0,2.3727787335713706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,1,fp8,fp8,0,2.2016053199768066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,1,float16,float16,0,2.405205408732096
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,1,float16,fp8,0,2.3659520149230957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,1,fp8,fp8,0,2.227882703145345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,1,float16,float16,0,2.405893325805664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,1,float16,fp8,0,2.423626740773519
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,1,fp8,fp8,0,2.243072032928467
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,1,float16,float16,0,2.466474692026774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,1,float16,fp8,0,2.5304746627807617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,1,fp8,fp8,0,2.2959787050882974
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,1,float16,float16,0,1.3233493169148762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,1,float16,float16,0,1.2516693274180095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,1,float16,fp8,0,1.328810691833496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,1,float16,fp8,0,1.2243680159250896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,1,fp8,fp8,0,1.2263946533203125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,1,fp8,fp8,0,1.1272532939910889
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,1,float16,float16,0,1.2151467005411785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,1,float16,fp8,0,1.2328906853993733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,1,fp8,fp8,0,1.132031997044881
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,1,float16,float16,0,1.242965300877889
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,1,float16,fp8,0,1.2284693717956543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,1,fp8,fp8,0,1.1439786752065022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,1,float16,float16,0,1.2904053529103596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,1,float16,fp8,0,1.295530637105306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,1,fp8,fp8,0,1.1801653703053792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,1,float16,float16,0,0.6908586819966634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,1,float16,fp8,0,0.7157759666442871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,1,float16,float16,0,0.65774933497111
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,1,float16,fp8,0,0.6507519880930582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,1,fp8,fp8,0,0.6483626763025919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,1,fp8,fp8,0,0.5982933441797892
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,1,float16,float16,0,0.6594560146331787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,1,float16,fp8,0,0.652789314587911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,1,fp8,fp8,0,0.6017706791559855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,1,float16,float16,0,0.6669600009918213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,1,float16,fp8,0,0.6686720053354899
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,1,fp8,fp8,0,0.6062026818593343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,1,float16,float16,0,0.6850559711456299
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,1,float16,float16,0,0.381440003712972
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,1,float16,fp8,0,0.6860799789428711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,1,fp8,fp8,0,0.6222506761550903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,1,float16,fp8,0,0.39236267407735187
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,1,float16,float16,0,0.36027733484903973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,1,float16,fp8,0,0.36061867078145343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,1,fp8,fp8,0,0.33262399832407635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,1,fp8,fp8,0,0.35652267932891846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,1,float16,float16,0,0.3619840145111084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,1,float16,fp8,0,0.36369065443674725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,1,fp8,fp8,0,0.3326293428738912
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,1,float16,float16,0,0.36403199036916095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,1,float16,fp8,0,0.36983466148376465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,1,fp8,fp8,0,0.3394560019175212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,1,float16,float16,0,0.37597866853078205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,1,float16,fp8,0,0.3739306529362996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,1,float16,float16,0,0.2259626587231954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,1,fp8,fp8,0,0.34628268082936603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,1,float16,float16,0,0.21009065707524618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,1,float16,fp8,0,0.2300586700439453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,1,fp8,fp8,0,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,1,float16,fp8,0,0.21368533372879028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,1,fp8,fp8,0,0.19200533628463745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,1,float16,float16,0,0.21401600042978922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,1,float16,fp8,0,0.20974934101104736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,1,fp8,fp8,0,0.19643733898798624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,1,float16,float16,0,0.2126506765683492
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,1,float16,fp8,0,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,1,fp8,fp8,0,0.1991680065790812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,1,float16,float16,0,0.21912533044815063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,1,float16,float16,0,0.14250666896502176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,1,float16,fp8,0,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,1,fp8,fp8,0,0.20377600193023682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,1,float16,fp8,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,1,float16,float16,0,0.13875200351079306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,1,float16,fp8,0,0.14079999923706055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,1,fp8,fp8,0,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,1,fp8,fp8,0,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,1,float16,float16,0,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,1,float16,fp8,0,0.14113600055376688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,1,fp8,fp8,0,0.13193600376447043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,1,float16,float16,0,0.13943466544151306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,1,float16,fp8,0,0.14064000050226846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,1,fp8,fp8,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,1,float16,float16,0,0.1431839962800344
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,1,fp8,fp8,0,0.13260799646377563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,1,float16,float16,0,1.555466651916504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,1,float16,fp8,0,1.5558080673217773
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,1,fp8,fp8,0,1.4851412773132324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,1,float16,float16,0,1.5870292981465657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,1,float16,fp8,0,1.5856639544169109
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,1,fp8,fp8,0,1.500501314798991
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,1,float16,float16,0,1.593173344930013
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,1,float16,fp8,0,1.5948905944824219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,1,fp8,fp8,0,1.5230293273925781
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,1,float16,float16,0,1.628335952758789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,1,float16,fp8,0,1.6249173482259114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,1,fp8,fp8,0,1.5735467274983723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,1,float16,float16,0,0.8932693004608154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,1,float16,float16,0,0.8179519971211752
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,1,float16,fp8,0,0.9004267056783041
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,1,float16,fp8,0,0.8227787017822266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,1,fp8,fp8,0,0.8400266965230306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,1,fp8,fp8,0,0.7676533063252767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,1,float16,float16,0,0.8231306870778402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,1,float16,fp8,0,0.8268746534983317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,1,fp8,fp8,0,0.7717546621958414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,1,float16,float16,0,0.8306346734364828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,1,float16,fp8,0,0.8193706671396891
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,1,fp8,fp8,0,0.7826773325602213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,1,float16,float16,0,0.8447999954223633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,1,float16,fp8,0,0.8567466735839844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,1,float16,float16,0,0.4739413261413574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,1,fp8,fp8,0,0.8060586452484131
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,1,float16,fp8,0,0.47598934173583984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,1,float16,float16,0,0.44014934698740643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,1,float16,fp8,0,0.44049068291982013
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,1,fp8,fp8,0,0.4490240017573039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,1,fp8,fp8,0,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,1,float16,float16,0,0.44253865877787274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,1,float16,fp8,0,0.443557341893514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,1,fp8,fp8,0,0.4150559902191162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,1,float16,float16,0,0.44253865877787274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,1,float16,fp8,0,0.44868799050649005
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,1,fp8,fp8,0,0.41915734608968097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,1,float16,float16,0,0.45653335253397626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,1,float16,fp8,0,0.45631468296051025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,1,float16,float16,0,0.26687999566396076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,1,fp8,fp8,0,0.4295733372370402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,1,float16,fp8,0,0.2688000003496806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,1,float16,float16,0,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,1,float16,fp8,0,0.24371200799942017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,1,fp8,fp8,0,0.22805867592493692
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,1,fp8,fp8,0,0.25088000297546387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,1,float16,float16,0,0.2450773318608602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,1,float16,fp8,0,0.2450773318608602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,1,fp8,fp8,0,0.2327786684036255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,1,float16,float16,0,0.2474720080693563
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,1,float16,fp8,0,0.2515679995218913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,1,fp8,fp8,0,0.23586666584014893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,1,float16,float16,0,0.2542933424313863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,1,float16,fp8,0,0.2553226749102275
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,1,fp8,fp8,0,0.2416693369547526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,1,float16,float16,0,0.15615466237068176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,1,float16,fp8,0,0.1585493286450704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,1,fp8,fp8,0,0.15069866180419922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,1,float16,float16,0,0.14660267035166422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,1,float16,fp8,0,0.14574933052062988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,1,fp8,fp8,0,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,1,float16,float16,0,0.1443839967250824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,1,float16,fp8,0,0.14642666776974997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,1,fp8,fp8,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,1,float16,float16,0,0.14524799585342407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,1,float16,fp8,0,0.1469439963499705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,1,fp8,fp8,0,0.1384106675783793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,1,float16,float16,0,0.14728533228238425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,1,float16,fp8,0,0.14830933014551798
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,1,fp8,fp8,0,0.1430186629295349
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,1,float16,float16,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,1,float16,fp8,0,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,1,fp8,fp8,0,0.09249599774678548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,1,fp8,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,1,float16,float16,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,1,float16,fp8,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,1,fp8,fp8,0,0.09180800120035808
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,1,float16,float16,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,1,fp8,fp8,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,1,float16,float16,0,1.8006985982259114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,1,float16,fp8,0,1.8020693461100261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,1,fp8,fp8,0,1.7682773272196453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,1,float16,float16,0,1.8044586181640625
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,1,float16,fp8,0,1.8143572807312012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,1,fp8,fp8,0,1.7860266367594402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,1,float16,float16,0,1.8421759605407715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,1,float16,fp8,0,1.8348053296407063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,1,fp8,fp8,0,1.8266453742980957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,1,float16,float16,0,1.8904746373494465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,1,float16,fp8,0,1.911296049753825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,1,fp8,fp8,0,1.8807466824849446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,1,float16,float16,0,0.9972053368886312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,1,float16,float16,0,0.9147733052571615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,1,float16,fp8,0,1.0031253496805828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,1,float16,fp8,0,0.919381300608317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,1,fp8,fp8,0,0.9992533524831136
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,1,fp8,fp8,0,0.9014613628387451
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,1,float16,float16,0,0.9248426755269369
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,1,float16,fp8,0,0.919381300608317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,1,fp8,fp8,0,0.9093173344930013
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,1,float16,float16,0,0.9255253473917643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,1,float16,fp8,0,0.9405439694722494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,1,fp8,fp8,0,0.924511988957723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,1,float16,float16,0,0.9658026695251465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,1,float16,fp8,0,0.9658079942067465
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,1,float16,float16,0,0.5217333237330118
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,1,fp8,fp8,0,0.9579520225524902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,1,float16,fp8,0,0.5283840099970499
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,1,float16,float16,0,0.4797439972559611
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,1,float16,fp8,0,0.4828159809112549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,1,fp8,fp8,0,0.5186560153961182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,1,fp8,fp8,0,0.469157338142395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,1,float16,float16,0,0.48110934098561603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,1,float16,fp8,0,0.4852000077565511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,1,fp8,fp8,0,0.4729173183441162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,1,float16,float16,0,0.48930132389068604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,1,float16,fp8,0,0.4910026788711548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,1,fp8,fp8,0,0.48213334878285724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,1,float16,float16,0,0.5005653301874796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,1,float16,float16,0,0.2845013340314229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,1,float16,fp8,0,0.5073920090993246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,1,fp8,fp8,0,0.49715201059977215
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,1,float16,fp8,0,0.28484266996383667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,1,float16,float16,0,0.25736000140508014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,1,float16,fp8,0,0.25702399015426636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,1,fp8,fp8,0,0.25361599524815875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,1,fp8,fp8,0,0.2797226707140605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,1,float16,float16,0,0.2590773304303487
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,1,float16,fp8,0,0.2611146569252014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,1,fp8,fp8,0,0.2549706697463989
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,1,float16,float16,0,0.26573334137598675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,1,float16,fp8,0,0.26641066869099933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,1,fp8,fp8,0,0.258730669816335
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,1,float16,float16,0,0.26948267221450806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,1,float16,fp8,0,0.2769920031229655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,1,float16,float16,0,0.15898133317629495
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,1,fp8,fp8,0,0.2667520046234131
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,1,float16,float16,0,0.14285332957903543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,1,float16,fp8,0,0.1609386702378591
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,1,fp8,fp8,0,0.15956800182660422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,1,fp8,fp8,0,0.13805866241455078
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,1,float16,float16,0,0.14335999886194864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,1,fp8,fp8,0,0.13993066549301147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,1,float16,float16,0,0.14523733655611673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,1,float16,fp8,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,1,fp8,fp8,0,0.1454080045223236
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,1,float16,float16,0,0.15241600076357523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,1,float16,fp8,0,0.1527466674645742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,1,float16,float16,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,1,fp8,fp8,0,0.15240533153216043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,1,float16,fp8,0,0.09829866886138916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,1,float16,float16,0,0.08919466535250346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,1,float16,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,1,fp8,fp8,0,0.09557867050170898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,1,fp8,fp8,0,0.0890826682249705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,1,float16,float16,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,1,float16,fp8,0,0.0897653301556905
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,1,fp8,fp8,0,0.08738666772842407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,1,float16,float16,0,0.09215999643007915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,1,float16,fp8,0,0.09147199988365173
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,1,fp8,fp8,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,1,float16,float16,0,0.0911253293355306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,1,float16,fp8,0,0.09523733456929524
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,1,fp8,fp8,0,0.09045867125193278
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,1,float16,float16,0,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,1,float16,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,1,fp8,fp8,0,0.05939733485380808
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,1,float16,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,1,float16,fp8,0,0.0635040005048116
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,1,fp8,fp8,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,1,float16,fp8,0,0.06177066763242086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,1,fp8,fp8,0,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,1,float16,float16,0,1.2595199743906658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,1,float16,fp8,0,1.2685706615447998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,1,fp8,fp8,0,1.2654933134714763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,1,float16,float16,0,1.2750506401062012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,1,float16,fp8,0,1.267194668451945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,1,fp8,fp8,0,1.2798293431599934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,1,float16,float16,0,1.291434685389201
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,1,float16,fp8,0,1.295530637105306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,1,fp8,fp8,0,1.2951893011728923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,1,float16,float16,0,1.3387093544006348
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,1,float16,fp8,0,1.3410986264546711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,1,fp8,fp8,0,1.3496267000834148
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,1,float16,float16,0,0.7038293679555258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,1,float16,float16,0,0.646997332572937
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,1,float16,fp8,0,0.7164586385091146
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,1,float16,fp8,0,0.644266684850057
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,1,fp8,fp8,0,0.6521173318227133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,1,fp8,fp8,0,0.7273813088734945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,1,float16,float16,0,0.652453343073527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,1,float16,fp8,0,0.6497333447138468
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,1,fp8,fp8,0,0.6587786674499512
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,1,float16,float16,0,0.659114678700765
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,1,float16,fp8,0,0.6608213186264038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,1,fp8,fp8,0,0.6666239897410074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,1,float16,float16,0,0.6789173285166422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,1,float16,fp8,0,0.6812907059987386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,1,float16,float16,0,0.3715360164642334
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,1,fp8,fp8,0,0.6881279945373535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,1,float16,fp8,0,0.38042132059733075
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,1,fp8,fp8,0,0.38076265652974445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,1,float16,float16,0,0.3394560019175212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,1,float16,fp8,0,0.34116268157958984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,1,fp8,fp8,0,0.3452586730321248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,1,float16,float16,0,0.3432106574376424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,1,float16,fp8,0,0.3428533474604289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,1,fp8,fp8,0,0.3466240167617798
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,1,float16,float16,0,0.3500373363494873
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,1,float16,fp8,0,0.35072000821431476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,1,fp8,fp8,0,0.3500373363494873
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,1,float16,float16,0,0.3575466473897298
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,1,float16,fp8,0,0.36061867078145343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,1,float16,float16,0,0.20514132579167685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,1,fp8,fp8,0,0.3609600067138672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,1,float16,fp8,0,0.20940800507863364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,1,float16,float16,0,0.18278400103251138
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,1,float16,fp8,0,0.1848319967587789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,1,fp8,fp8,0,0.20974934101104736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,1,fp8,fp8,0,0.1879040002822876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,1,float16,float16,0,0.1848319967587789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,1,float16,fp8,0,0.18568533658981323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,1,fp8,fp8,0,0.19131733973821005
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,1,float16,float16,0,0.18961066007614136
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,1,float16,fp8,0,0.1904639999071757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,1,fp8,fp8,0,0.1930239995320638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,1,float16,float16,0,0.19745065768559775
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,1,float16,fp8,0,0.19780266284942627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,1,fp8,fp8,0,0.1986560026804606
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,1,float16,float16,0,0.11776000261306763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,1,float16,fp8,0,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,1,fp8,fp8,0,0.12219732999801636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,1,float16,float16,0,0.1058186690012614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,1,fp8,fp8,0,0.1058240036169688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,1,float16,float16,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,1,float16,fp8,0,0.10853866736094157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,1,fp8,fp8,0,0.105813334385554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,1,float16,float16,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,1,float16,fp8,0,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,1,fp8,fp8,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,1,float16,float16,0,0.11058666308720906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,1,float16,fp8,0,0.11502400040626526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,1,float16,float16,0,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,1,float16,fp8,0,0.0747573326031367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,1,float16,float16,0,0.07065600156784058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,1,fp8,fp8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,1,float16,fp8,0,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,1,float16,float16,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,1,float16,fp8,0,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,1,float16,float16,0,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,1,float16,fp8,0,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,1,fp8,fp8,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,1,float16,float16,0,0.07237333556016286
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,1,float16,fp8,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,1,float16,float16,0,0.0525546669960022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,1,fp8,fp8,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,1,float16,fp8,0,0.0532533327738444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,1,float16,fp8,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,1,fp8,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,1,float16,float16,0,0.051685333251953125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,1,fp8,fp8,0,0.05050666630268097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,1,float16,fp8,0,0.052570665876070656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,1,float16,float16,0,1.412432034810384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,1,float16,fp8,0,1.4062933921813965
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,1,fp8,fp8,0,1.475920041402181
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,1,float16,float16,0,1.4216532707214355
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,1,float16,fp8,0,1.4172159830729167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,1,fp8,fp8,0,1.511082649230957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,1,float16,float16,0,1.4637974103291829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,1,float16,fp8,0,1.4457119305928547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,1,fp8,fp8,0,1.7140053113301594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,1,float16,float16,0,1.472511927286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,1,float16,fp8,0,1.4549387296040852
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,1,fp8,fp8,0,1.6809013684590657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,1,float16,float16,0,0.8050293127695719
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,1,float16,float16,0,0.7174826463063558
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,1,float16,fp8,0,0.7906880378723145
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,1,float16,fp8,0,0.717146635055542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,1,fp8,fp8,0,0.7485439777374268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,1,fp8,fp8,0,0.8434346516927084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,1,float16,float16,0,0.724992036819458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,1,float16,fp8,0,0.722266674041748
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,1,fp8,fp8,0,0.7659520308176676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,1,float16,float16,0,0.7427413463592529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,1,float16,fp8,0,0.7403519948323568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,1,fp8,fp8,0,0.854693333307902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,1,float16,float16,0,0.7505919933319092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,1,float16,fp8,0,0.7410399913787842
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,1,float16,float16,0,0.40993599096934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,1,fp8,fp8,0,0.8354132970174154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,1,float16,fp8,0,0.40243732929229736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,1,float16,float16,0,0.3729066848754883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,1,fp8,fp8,0,0.4271786610285441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,1,float16,fp8,0,0.37085334459940594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,1,fp8,fp8,0,0.38519465923309326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,1,float16,float16,0,0.37426666418711346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,1,float16,fp8,0,0.37085866928100586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,1,fp8,fp8,0,0.3956000010172526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,1,float16,float16,0,0.38417065143585205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,1,float16,fp8,0,0.3824640115102132
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,1,fp8,fp8,0,0.4227413336435954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,1,float16,float16,0,0.39031465848286945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,1,float16,fp8,0,0.3838293155034383
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,1,float16,float16,0,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,1,fp8,fp8,0,0.42854400475819904
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,1,float16,fp8,0,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,1,float16,float16,0,0.19541333119074503
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,1,float16,fp8,0,0.19746132691701254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,1,fp8,fp8,0,0.20206934213638306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,1,fp8,fp8,0,0.22562134265899658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,1,float16,float16,0,0.19882132609685263
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,1,float16,fp8,0,0.19950934251149496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,1,fp8,fp8,0,0.20872533321380615
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,1,float16,float16,0,0.2044586737950643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,1,float16,fp8,0,0.20411733786265054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,1,fp8,fp8,0,0.2194719910621643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,1,float16,float16,0,0.20906132459640503
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,1,float16,fp8,0,0.20428800582885742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,1,fp8,fp8,0,0.21606399615605673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,1,float16,float16,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,1,float16,fp8,0,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,1,fp8,fp8,0,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,1,float16,float16,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,1,float16,fp8,0,0.10853866736094157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,1,fp8,fp8,0,0.11059199770291646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,1,float16,float16,0,0.11025066177050273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,1,float16,fp8,0,0.10785599549611409
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,1,fp8,fp8,0,0.11263466874758403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,1,float16,float16,0,0.11125866572062175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,1,float16,fp8,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,1,fp8,fp8,0,0.1181066632270813
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,1,float16,float16,0,0.11639466881752014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,1,float16,fp8,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,1,float16,float16,0,0.06758933266003926
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,1,fp8,fp8,0,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,1,float16,fp8,0,0.06725333134333293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,1,float16,float16,0,0.06451733410358429
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,1,fp8,fp8,0,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,1,fp8,fp8,0,0.06451733410358429
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,1,float16,float16,0,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,1,fp8,fp8,0,0.06314133107662201
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,1,float16,float16,0,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,1,float16,fp8,0,0.06520533561706543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,1,float16,fp8,0,0.06689066688219707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,1,float16,fp8,0,0.03926933308442434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,1,float16,fp8,0,0.041290665666262306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,1,float16,fp8,0,0.03309333324432373
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,1,float16,fp8,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,1,float16,float16,0,1.2380159695943196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,1,float16,fp8,0,1.2219733397165935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,1,fp8,fp8,0,1.3226559956868489
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,1,float16,float16,0,1.2421120007832844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,1,float16,fp8,0,1.2356266975402832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,1,fp8,fp8,0,1.355434735616048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,1,float16,float16,0,1.2958719730377197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,1,float16,fp8,0,1.2852906386057537
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,1,fp8,fp8,0,1.5557972590128581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,1,float16,float16,0,1.290069341659546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,1,float16,fp8,0,1.279146671295166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,1,fp8,fp8,0,1.5380533536275227
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,1,float16,float16,0,0.7147573630015055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,1,float16,float16,0,0.6304426590601603
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,1,float16,fp8,0,0.6993973255157471
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,1,float16,fp8,0,0.6256373325983683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,1,fp8,fp8,0,0.6731093724568685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,1,fp8,fp8,0,0.769706646601359
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,1,float16,float16,0,0.6306133270263672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,1,float16,fp8,0,0.63045867284139
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,1,fp8,fp8,0,0.6929066975911459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,1,float16,float16,0,0.6514346599578857
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,1,float16,fp8,0,0.6473386685053507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,1,fp8,fp8,0,0.7758506933848063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,1,float16,float16,0,0.6584320068359375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,1,float16,fp8,0,0.6483626763025919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,1,float16,float16,0,0.3653973340988159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,1,fp8,fp8,0,0.7389866511027018
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,1,float16,fp8,0,0.3568640152613322
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,1,float16,float16,0,0.32443734010060626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,1,fp8,fp8,0,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,1,float16,fp8,0,0.32238932450612384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,1,fp8,fp8,0,0.34833065668741864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,1,float16,float16,0,0.328874667485555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,1,float16,fp8,0,0.3251199920972188
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,1,fp8,fp8,0,0.3537919918696086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,1,float16,float16,0,0.33673067887624103
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,1,float16,fp8,0,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,1,fp8,fp8,0,0.3831466833750407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,1,float16,float16,0,0.3428693215052287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,1,float16,fp8,0,0.33672531445821124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,1,float16,float16,0,0.19165333112080893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,1,fp8,fp8,0,0.3821386496225993
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,1,float16,fp8,0,0.1909760038057963
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,1,float16,float16,0,0.1723733345667521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,1,float16,fp8,0,0.17271467049916586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,1,fp8,fp8,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,1,fp8,fp8,0,0.18308266003926596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,1,float16,float16,0,0.17356799046198526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,1,float16,fp8,0,0.17289066314697266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,1,fp8,fp8,0,0.18619734048843384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,1,float16,float16,0,0.1800533334414164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,1,float16,fp8,0,0.17698132991790771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,1,fp8,fp8,0,0.19883199532826742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,1,float16,float16,0,0.18261333306630453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,1,float16,fp8,0,0.1800533334414164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,1,float16,float16,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,1,fp8,fp8,0,0.19404266277949014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,1,float16,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,1,float16,float16,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,1,fp8,fp8,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,1,float16,fp8,0,0.09385599692662557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,1,fp8,fp8,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,1,float16,float16,0,0.09659199913342793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,1,float16,fp8,0,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,1,fp8,fp8,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,1,float16,float16,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,1,float16,fp8,0,0.0986400047938029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,1,fp8,fp8,0,0.106495996316274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,1,float16,float16,0,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,1,float16,fp8,0,0.10034666458765666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,1,float16,float16,0,0.058362667759259544
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,1,fp8,fp8,0,0.11161599556605022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,1,float16,fp8,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,1,fp8,fp8,0,0.0631520003080368
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,1,float16,float16,0,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,1,fp8,fp8,0,0.056314667065938316
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,1,float16,float16,0,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,1,fp8,fp8,0,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,1,float16,float16,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,1,float16,float16,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,1,float16,fp8,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,1,float16,float16,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,1,fp8,fp8,0,0.028677334388097126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,1,float16,float16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,1,fp8,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,1,float16,fp8,0,0.45892266432444256
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,1,float16,float16,0,0.4647253354390462
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,1,fp8,fp8,0,0.493397315343221
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,1,float16,fp8,0,0.47018134593963623
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,1,fp8,fp8,0,0.5148906707763672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,1,float16,float16,0,0.5009066661198934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,1,float16,fp8,0,0.4940799872080485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,1,fp8,fp8,0,0.5986986557642618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,1,float16,float16,0,0.5063626766204834
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,1,float16,fp8,0,0.4957919915517171
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,1,fp8,fp8,0,0.5963146686553955
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,1,float16,float16,0,0.285866657892863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,1,float16,fp8,0,0.2797279953956604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,1,float16,float16,0,0.24644267559051514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,1,fp8,fp8,0,0.30156266689300537
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,1,float16,fp8,0,0.24371200799942017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,1,fp8,fp8,0,0.25702399015426636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,1,float16,float16,0,0.25306665897369385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,1,float16,fp8,0,0.2481493353843689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,1,fp8,fp8,0,0.2701653242111206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,1,float16,float16,0,0.26180267333984375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,1,float16,fp8,0,0.2611200014750163
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,1,fp8,fp8,0,0.2974720001220703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,1,float16,float16,0,0.26641066869099933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,1,float16,fp8,0,0.2611146569252014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,1,fp8,fp8,0,0.2950826684633891
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,1,float16,float16,0,0.1565013329188029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,1,float16,fp8,0,0.15478400389353433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,1,fp8,fp8,0,0.16401066382726034
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,1,float16,float16,0,0.13294399778048197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,1,float16,fp8,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,1,fp8,fp8,0,0.14250666896502176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,1,float16,float16,0,0.1353333294391632
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,1,float16,fp8,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,1,fp8,fp8,0,0.14523733655611673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,1,float16,float16,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,1,float16,fp8,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,1,fp8,fp8,0,0.1570133368174235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,1,float16,float16,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,1,float16,fp8,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,1,fp8,fp8,0,0.1539413332939148
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,1,float16,float16,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,1,float16,fp8,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,1,fp8,fp8,0,0.09557333588600159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,1,float16,fp8,0,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,1,float16,fp8,0,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,1,float16,float16,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,1,float16,float16,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,1,float16,fp8,0,0.08122666676839192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,1,fp8,fp8,0,0.05392533540725708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,1,float16,float16,0,0.04641599953174591
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,1,fp8,fp8,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,1,float16,float16,0,0.04608533283074697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,1,float16,fp8,0,0.04710933566093445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,1,fp8,fp8,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,1,float16,float16,0,0.04880533119042715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,1,float16,float16,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,1,float16,float16,0,0.024570666253566742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,1,fp8,fp8,0,0.025946666797002155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,1,float16,float16,0,0.47496533393859863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,1,float16,float16,0,0.0206986665725708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,1,fp8,fp8,0,0.02070933332045873
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,1,float16,float16,0,0.020714666694402695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,1,fp8,fp8,0,0.020693333198626835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,1,fp8,fp8,0,0.020703999946514767
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,1,float16,float16,0,0.25088000297546387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,1,float16,fp8,0,0.2501973311106364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,1,fp8,fp8,0,0.2688000003496806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,1,float16,float16,0,0.25736000140508014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,1,float16,fp8,0,0.25224532683690387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,1,fp8,fp8,0,0.27562665939331055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,1,float16,float16,0,0.2715253432591756
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,1,float16,fp8,0,0.26709334055582684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,1,fp8,fp8,0,0.29918400446573895
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,1,float16,float16,0,0.27529066801071167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,1,float16,fp8,0,0.2691413362820943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,1,fp8,fp8,0,0.30430400371551514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,1,float16,float16,0,0.15990933775901794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,1,float16,fp8,0,0.15547200043996176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,1,float16,float16,0,0.13517333070437113
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,1,fp8,fp8,0,0.16656532883644104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,1,float16,fp8,0,0.13447999954223633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,1,fp8,fp8,0,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,1,float16,float16,0,0.1390880048274994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,1,float16,fp8,0,0.1372160017490387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,1,fp8,fp8,0,0.14643733700116476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,1,float16,float16,0,0.14711466431617737
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,1,float16,fp8,0,0.1442080040772756
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,1,fp8,fp8,0,0.15820800264676413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,1,float16,float16,0,0.14728533228238425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,1,float16,fp8,0,0.14660267035166422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,1,float16,float16,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,1,fp8,fp8,0,0.16196800271670023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,1,float16,fp8,0,0.09250666697820027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,1,fp8,fp8,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,1,float16,float16,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,1,float16,fp8,0,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,1,float16,fp8,0,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,1,fp8,fp8,0,0.0825973351796468
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,1,float16,float16,0,0.0825973351796468
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,1,fp8,fp8,0,0.08704533179601033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,1,float16,float16,0,0.08363733688990276
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,1,float16,fp8,0,0.0839573343594869
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,1,fp8,fp8,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,1,float16,float16,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,1,float16,fp8,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,1,fp8,fp8,0,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,1,float16,fp8,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,1,float16,fp8,0,0.03309333324432373
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,1,float16,float16,0,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,1,fp8,fp8,0,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,1,float16,float16,0,0.023120000958442688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,1,float16,fp8,0,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,1,fp8,fp8,0,0.019679999599854153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,1,float16,float16,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,1,fp8,fp8,0,0.01764800027012825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,1,float16,fp8,0,0.018650667121013004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,1,fp8,fp8,0,0.017583999782800674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,1,float16,float16,0,0.017477333545684814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,1,float16,fp8,0,0.01860800012946129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,1,float16,float16,0,0.018511999398469925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,1,float16,float16,0,0.17596266667048135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,1,float16,fp8,0,0.17629865805308023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,1,fp8,fp8,0,0.1950719952583313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,1,float16,float16,0,0.1776640017827352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,1,float16,fp8,0,0.17595734198888144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,1,fp8,fp8,0,0.19729065895080566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,1,float16,float16,0,0.18312533696492514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,1,float16,fp8,0,0.18483734130859375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,1,fp8,fp8,0,0.20923733711242676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,1,float16,float16,0,0.18398400147755942
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,1,float16,fp8,0,0.18466132879257202
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,1,fp8,fp8,0,0.20906666914621988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,1,float16,float16,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,1,float16,fp8,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,1,float16,float16,0,0.09729066491127014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,1,float16,fp8,0,0.09693333506584167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,1,fp8,fp8,0,0.10478400190671285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,1,float16,float16,0,0.09727467099825542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,1,fp8,fp8,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,1,float16,float16,0,0.1013866662979126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,1,float16,fp8,0,0.10171733299891154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,1,float16,float16,0,0.10273599624633789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,1,fp8,fp8,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,1,float16,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,1,fp8,fp8,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,1,fp8,fp8,0,0.06144533554712931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,1,float16,float16,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,1,fp8,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,1,float16,float16,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,1,float16,fp8,0,0.03787733366092046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,1,float16,float16,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,1,float16,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,1,float16,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,1,float16,float16,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,1,float16,float16,0,0.018325333793958027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,1,float16,fp8,0,0.017978666971127193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,1,fp8,fp8,0,0.018511999398469925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,1,float16,float16,0,0.017984000345071156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,1,float16,float16,0,0.016842667013406754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,1,float16,float16,0,0.016634666671355564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,1,float16,float16,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,1,float16,float16,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,1,float16,fp8,0,0.1431893308957418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,1,fp8,fp8,0,0.1616213321685791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,1,float16,float16,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,1,float16,fp8,0,0.14421866337458292
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,1,fp8,fp8,0,0.16196266810099283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,1,float16,float16,0,0.14779733618100485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,1,float16,fp8,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,1,fp8,fp8,0,0.16810667514801025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,1,float16,float16,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,1,float16,fp8,0,0.15001599987347922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,1,fp8,fp8,0,0.16844799121220908
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,1,float16,fp8,0,0.08430400490760803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,1,fp8,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,1,float16,fp8,0,0.07885333398977916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,1,fp8,fp8,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,1,float16,float16,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,1,float16,float16,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,1,float16,fp8,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,1,float16,float16,0,0.05120533208052317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,1,fp8,fp8,0,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,1,fp8,fp8,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,1,float16,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,1,float16,float16,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,1,float16,fp8,0,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,1,float16,fp8,0,0.05052266518274943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,1,fp8,fp8,0,0.03446933378775915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,1,float16,fp8,0,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,1,fp8,fp8,0,0.023898666103680927
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,1,float16,fp8,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,1,float16,float16,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,1,float16,float16,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,1,float16,float16,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,1,float16,fp8,0,0.018687999496857326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,1,float16,float16,0,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,1,float16,float16,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,1,float16,float16,0,0.01664000004529953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,1,float16,float16,0,0.12732266386349997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,1,float16,fp8,0,0.12596266468365988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,1,fp8,fp8,0,0.14079999923706055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,1,float16,float16,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,1,float16,fp8,0,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,1,fp8,fp8,0,0.14131200313568115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,1,float16,float16,0,0.12801067034403482
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,1,fp8,fp8,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,1,float16,float16,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,1,float16,fp8,0,0.1283466617266337
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,1,fp8,fp8,0,0.14472533265749613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,1,fp8,fp8,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,1,float16,float16,0,0.07201600074768066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,1,float16,fp8,0,0.07268799841403961
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,1,float16,float16,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,1,float16,fp8,0,0.07406400144100189
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,1,fp8,fp8,0,0.08020799855391185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,1,float16,float16,0,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,1,fp8,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,1,float16,float16,0,0.03071466585000356
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,1,float16,float16,0,0.021040000021457672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,1,float16,fp8,0,0.02182399978240331
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,1,float16,float16,0,0.022533332308133442
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,1,fp8,fp8,0,0.018426666657129925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,1,float16,fp8,0,0.01836799954374631
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,1,float16,float16,0,4.895061175028483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,1,float16,fp8,0,4.749829292297363
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,1,fp8,fp8,0,4.230485280354817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,1,float16,float16,0,4.969135920206706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,1,float16,fp8,0,4.8506879806518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,1,fp8,fp8,0,4.244821230570476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,1,float16,float16,0,5.197141329447429
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,1,float16,fp8,0,5.036629358927409
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,1,fp8,fp8,0,4.276565233866374
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,1,float16,float16,0,2.570751984914144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,1,float16,fp8,0,2.7286399205525718
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,1,fp8,fp8,0,2.2707200050354004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,1,float16,float16,0,2.887338638305664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,1,float16,fp8,0,2.4987306594848633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,1,fp8,fp8,0,2.2050132751464844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,1,float16,float16,0,2.5048747062683105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,1,float16,fp8,0,2.540032068888346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,1,fp8,fp8,0,2.2084266344706216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,1,float16,float16,0,2.717184066772461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,1,float16,fp8,0,2.5567572911580405
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,1,float16,float16,0,1.4450292587280273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,1,fp8,fp8,0,2.2261759440104165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,1,float16,fp8,0,1.4274560610453289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,1,fp8,fp8,0,1.2298239866892497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,1,float16,float16,0,1.3677226702372234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,1,float16,fp8,0,1.3588426907857258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,1,fp8,fp8,0,1.1936426957448323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,1,float16,float16,0,1.3598720232645671
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,1,float16,fp8,0,1.3991252581278484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,1,fp8,fp8,0,1.19432536760966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,1,float16,float16,0,1.394693374633789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,1,float16,float16,0,0.7876266638437907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,1,float16,fp8,0,1.4247466723124187
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,1,fp8,fp8,0,1.2025279998779297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,1,float16,float16,0,0.7703893184661865
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,1,float16,fp8,0,0.800597349802653
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,1,float16,fp8,0,0.8104960123697916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,1,fp8,fp8,0,0.7120213508605957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,1,fp8,fp8,0,0.6881333192189535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,1,float16,float16,0,0.8040106296539307
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,1,float16,fp8,0,0.7906986872355143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,1,fp8,fp8,0,0.6949653625488281
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,1,float16,float16,0,0.7982186476389567
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,1,float16,fp8,0,0.8084479967753092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,1,fp8,fp8,0,0.6939360300699869
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,1,float16,float16,0,2.9300107955932617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,1,float16,fp8,0,2.9392213821411133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,1,fp8,fp8,0,2.5540266036987305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,1,float16,float16,0,2.8955307006835938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,1,float16,fp8,0,2.935119946797689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,1,fp8,fp8,0,2.5649654070536294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,1,float16,float16,0,2.987519900004069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,1,float16,fp8,0,2.919424057006836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,1,fp8,fp8,0,2.589183966318766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,1,float16,float16,0,1.6351572672526042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,1,float16,fp8,0,1.65121062596639
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,1,fp8,fp8,0,1.4039039611816406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,1,float16,float16,0,1.5791786511739094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,1,float16,fp8,0,1.5400959650675456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,1,fp8,fp8,0,1.3509972890218098
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,1,float16,float16,0,1.5202986399332683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,1,float16,fp8,0,1.5586986541748047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,1,fp8,fp8,0,1.349631945292155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,1,float16,float16,0,1.604101339975993
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,1,float16,fp8,0,1.578495979309082
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,1,float16,float16,0,0.889514684677124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,1,fp8,fp8,0,1.3714772860209148
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,1,float16,float16,0,0.8608427047729492
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,1,float16,fp8,0,0.9024853706359863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,1,float16,fp8,0,0.8393332958221436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,1,fp8,fp8,0,0.7738026777903239
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,1,fp8,fp8,0,0.7464959621429443
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,1,float16,float16,0,0.8751733303070068
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,1,float16,fp8,0,0.8686827023824056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,1,fp8,fp8,0,0.7471840381622314
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,1,float16,float16,0,0.8710827032725016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,1,float16,fp8,0,0.8847359816233317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,1,fp8,fp8,0,0.7577599684397379
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,1,float16,float16,0,0.5166079998016357
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,1,float16,float16,0,0.5029546817143759
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,1,float16,fp8,0,0.5248106718063354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,1,float16,fp8,0,0.5002239942550659
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,1,fp8,fp8,0,0.460970679918925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,1,fp8,fp8,0,0.4439093271891276
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,1,float16,float16,0,0.504309336344401
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,1,float16,fp8,0,0.5019306739171346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,1,fp8,fp8,0,0.44697598616282147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,1,float16,float16,0,0.5087573528289795
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,1,float16,fp8,0,0.5104639927546183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,1,fp8,fp8,0,0.4490240017573039
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,1,float16,float16,0,2.1678080558776855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,1,float16,fp8,0,2.130943934122721
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,1,fp8,fp8,0,1.87118927637736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,1,float16,float16,0,2.1090985933939614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,1,float16,fp8,0,2.1961386998494468
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,1,fp8,fp8,0,1.8780159950256348
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,1,float16,float16,0,2.151082674662272
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,1,float16,fp8,0,2.1275253295898438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,1,fp8,fp8,0,1.8979840278625488
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,1,float16,float16,0,1.1895466645558674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,1,float16,fp8,0,1.2151467005411785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,1,fp8,fp8,0,1.0453333059946697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,1,float16,float16,0,1.13100798924764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,1,float16,fp8,0,1.1548960208892822
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,1,fp8,fp8,0,0.9989120165506998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,1,float16,float16,0,1.1508053143819172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,1,float16,fp8,0,1.140229304631551
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,1,fp8,fp8,0,1.0088106791178386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,1,float16,float16,0,1.168554703394572
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,1,float16,float16,0,0.6737919648488363
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,1,float16,fp8,0,1.1402239799499512
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,1,fp8,fp8,0,1.0146026611328125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,1,float16,float16,0,0.6340266863505045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,1,float16,fp8,0,0.6480213403701782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,1,float16,fp8,0,0.6690133412679037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,1,fp8,fp8,0,0.5867520173390707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,1,fp8,fp8,0,0.5611573457717896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,1,float16,float16,0,0.649727980295817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,1,float16,fp8,0,0.6456319888432821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,1,fp8,fp8,0,0.5658239920934042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,1,float16,float16,0,0.6432480017344157
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,1,float16,fp8,0,0.6587733427683512
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,1,fp8,fp8,0,0.5669653415679932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,1,float16,float16,0,0.3990186850229899
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,1,float16,float16,0,0.3821226755777995
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,1,float16,fp8,0,0.39765334129333496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,1,fp8,fp8,0,0.35447466373443604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,1,float16,fp8,0,0.385535995165507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,1,fp8,fp8,0,0.3394560019175212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,1,float16,float16,0,0.3797333240509033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,1,float16,fp8,0,0.3872426748275757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,1,fp8,fp8,0,0.3432106574376424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,1,float16,float16,0,0.3845173517862956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,1,float16,fp8,0,0.3831466833750407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,1,fp8,fp8,0,0.34594134489695233
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,1,float16,float16,0,2.8310187657674155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,1,float16,fp8,0,2.8241920471191406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,1,fp8,fp8,0,2.5012906392415366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,1,float16,float16,0,2.8663466771443686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,1,float16,fp8,0,2.847402572631836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,1,fp8,fp8,0,2.511360009511312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,1,float16,float16,0,2.8265813191731772
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,1,float16,fp8,0,2.9320478439331055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,1,float16,float16,0,1.5278080304463704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,1,fp8,fp8,0,2.5386667251586914
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,1,float16,fp8,0,1.5476053555806477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,1,fp8,fp8,0,1.3694346745808919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,1,float16,float16,0,1.498794714609782
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,1,float16,fp8,0,1.4615893363952637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,1,fp8,fp8,0,1.2962079842885335
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,1,float16,float16,0,1.4769493738810222
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,1,float16,fp8,0,1.4639786084493
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,1,fp8,fp8,0,1.311578671137492
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,1,float16,float16,0,1.5018666585286458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,1,float16,fp8,0,1.539072036743164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,1,fp8,fp8,0,1.3189067045847576
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,1,float16,float16,0,0.8144213358561198
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,1,float16,fp8,0,0.8282399972279867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,1,float16,float16,0,0.8040106296539307
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,1,float16,fp8,0,0.8084479967753092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,1,fp8,fp8,0,0.740010658899943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,1,fp8,fp8,0,0.7007413705190023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,1,float16,float16,0,0.808789332707723
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,1,float16,fp8,0,0.7958239714304606
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,1,fp8,fp8,0,0.7079253196716309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,1,float16,float16,0,0.8084479967753092
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,1,float16,fp8,0,0.8186879952748617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,1,fp8,fp8,0,0.7147520383199056
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,1,float16,float16,0,0.4756480058034261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,1,float16,float16,0,0.4534613291422526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,1,float16,fp8,0,0.47496533393859863
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,1,float16,fp8,0,0.45687464872996014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,1,fp8,fp8,0,0.4208693504333496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,1,fp8,fp8,0,0.40447998046875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,1,float16,float16,0,0.4514133135477702
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,1,float16,fp8,0,0.4585813283920288
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,1,fp8,fp8,0,0.4031146764755249
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,1,float16,float16,0,0.46199464797973633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,1,float16,fp8,0,0.45823999245961505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,1,fp8,fp8,0,0.40755200386047363
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,1,float16,float16,0,0.28757333755493164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,1,float16,fp8,0,0.2916693290074666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,1,float16,float16,0,0.2797226707140605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,1,float16,fp8,0,0.2773333390553792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,1,fp8,fp8,0,0.26077866554260254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,1,fp8,fp8,0,0.2515626748402913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,1,float16,fp8,0,0.2783679962158203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,1,fp8,fp8,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,1,float16,float16,0,0.28039999802907306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,1,float16,fp8,0,0.2797226707140605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,1,fp8,fp8,0,0.2491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,1,float16,float16,0,1.7491626739501953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,1,float16,fp8,0,1.749834696451823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,1,fp8,fp8,0,1.574735959370931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,1,float16,float16,0,1.7733972867329915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,1,float16,fp8,0,1.760426680246989
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,1,fp8,fp8,0,1.582597255706787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,1,float16,float16,0,1.8324426015218098
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,1,float16,fp8,0,1.7826132774353027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,1,fp8,fp8,0,1.6058026949564617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,1,float16,float16,0,0.9705813725789388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,1,float16,fp8,0,1.005738655726115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,1,fp8,fp8,0,0.8820052941640218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,1,float16,float16,0,0.9477173487345377
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,1,float16,fp8,0,0.9432746569315592
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,1,float16,float16,0,0.950106700261434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,1,float16,fp8,0,0.9251893361409506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,1,fp8,fp8,0,0.8360959688822428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,1,float16,float16,0,0.927232027053833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,1,float16,fp8,0,0.9620479742685953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,1,fp8,fp8,0,0.8410453001658121
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,1,float16,float16,0,0.5427199999491373
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,1,float16,fp8,0,0.5471573273340861
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,1,float16,float16,0,0.5128639936447144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,1,fp8,fp8,0,0.4858880043029785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,1,float16,fp8,0,0.5128533442815145
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,1,fp8,fp8,0,0.45687464872996014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,1,float16,float16,0,0.516266663869222
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,1,float16,fp8,0,0.5196799834569296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,1,fp8,fp8,0,0.45789865652720135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,1,float16,float16,0,0.5234293142954508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,1,float16,fp8,0,0.5234346787134806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,1,fp8,fp8,0,0.4657493432362874
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,1,float16,float16,0,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,1,float16,fp8,0,0.3179519971211751
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,1,float16,float16,0,0.2974720001220703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,1,fp8,fp8,0,0.28279467423756915
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,1,float16,fp8,0,0.2964479923248291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,1,fp8,fp8,0,0.2653866608937581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,1,float16,float16,0,0.2995199958483378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,1,float16,fp8,0,0.2988373239835103
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,1,fp8,fp8,0,0.26948267221450806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,1,float16,float16,0,0.2998613317807515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,1,float16,fp8,0,0.306005338827769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,1,fp8,fp8,0,0.27186665932337445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,1,float16,float16,0,0.20189867417017618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,1,float16,fp8,0,0.20087466637293497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,1,fp8,fp8,0,0.1858560045560201
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,1,float16,float16,0,0.19336533546447754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,1,float16,fp8,0,0.19660800695419312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,1,fp8,fp8,0,0.17561600605646768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,1,float16,float16,0,0.19506667057673135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,1,float16,fp8,0,0.19370667139689127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,1,fp8,fp8,0,0.17664533853530884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,1,float16,float16,0,0.1933599909146627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,1,float16,fp8,0,0.19746132691701254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,1,fp8,fp8,0,0.1764693260192871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,1,float16,float16,0,1.8244266510009766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,1,float16,fp8,0,1.8298880259195964
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,1,fp8,fp8,0,1.6824320157368977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,1,float16,float16,0,1.829546610514323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,1,float16,fp8,0,1.8309119542439778
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,1,fp8,fp8,0,1.6991626421610515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,1,float16,float16,0,1.8723840713500977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,1,float16,fp8,0,1.8582186698913574
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,1,float16,float16,0,0.27801599105199176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,1,fp8,fp8,0,1.728000005086263
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,1,float16,float16,0,1.0289493401845295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,1,float16,float16,0,0.9490773677825928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,1,float16,fp8,0,1.0238186518351238
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,1,float16,fp8,0,0.9600000381469727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,1,fp8,fp8,0,0.8721013069152832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,1,fp8,fp8,0,0.943615992863973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,1,float16,float16,0,0.9647786617279053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,1,float16,fp8,0,0.9729706446329752
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,1,fp8,fp8,0,0.8755199909210205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,1,float16,float16,0,0.9777440230051676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,1,float16,fp8,0,0.986624002456665
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,1,fp8,fp8,0,0.8891733487447103
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,1,float16,float16,0,0.5464746554692587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,1,float16,fp8,0,0.5570559899012247
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,1,fp8,fp8,0,0.504314661026001
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,1,float16,float16,0,0.5104639927546183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,1,float16,fp8,0,0.5155839920043945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,1,fp8,fp8,0,0.826527992884318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,1,fp8,fp8,0,0.46779731909434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,1,float16,float16,0,0.514901320139567
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,1,float16,fp8,0,0.5207039912541708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,1,fp8,fp8,0,0.4684799909591675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,1,float16,float16,0,0.5227520068486532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,1,float16,fp8,0,0.5241173505783081
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,1,fp8,fp8,0,0.4773546854654948
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,1,float16,float16,0,0.30429333448410034
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,1,float16,fp8,0,0.3118133346239726
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,1,float16,float16,0,0.2845013340314229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,1,float16,fp8,0,0.283135990301768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,1,fp8,fp8,0,0.262501339117686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,1,fp8,fp8,0,0.28246400753657025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,1,float16,float16,0,0.2892746726671855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,1,float16,fp8,0,0.2855253418286641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,1,fp8,fp8,0,0.26641066869099933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,1,float16,float16,0,0.29234667619069415
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,1,float16,fp8,0,0.2943999965985616
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,1,fp8,fp8,0,0.26743467648824054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,1,float16,float16,0,0.18397865692774454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,1,float16,fp8,0,0.18311466773351034
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,1,float16,float16,0,0.16929600636164346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,1,float16,fp8,0,0.17271467049916586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,1,fp8,fp8,0,0.17185600598653158
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,1,fp8,fp8,0,0.15530666708946228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,1,float16,float16,0,0.16930667559305826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,1,float16,fp8,0,0.17391467094421387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,1,fp8,fp8,0,0.15718400478363037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,1,float16,float16,0,0.1728853384653727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,1,float16,fp8,0,0.17271467049916586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,1,fp8,fp8,0,0.1616213321685791
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,1,float16,float16,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,1,float16,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,1,float16,float16,0,0.115365336338679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,1,fp8,fp8,0,0.11127466956774394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,1,float16,fp8,0,0.11571199695269267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,1,fp8,fp8,0,0.10854400197664897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,1,float16,fp8,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,1,fp8,fp8,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,1,float16,float16,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,1,float16,fp8,0,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,1,fp8,fp8,0,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,1,float16,float16,0,1.20797864596049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,1,float16,fp8,0,1.2165119647979736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,1,fp8,fp8,0,1.1433013280232747
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,1,float16,float16,0,1.218570629755656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,1,float16,fp8,0,1.2120746771494548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,1,fp8,fp8,0,1.1518293221791585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,1,float16,float16,0,1.2482506434122722
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,1,float16,fp8,0,1.2274346351623535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,1,fp8,fp8,0,1.174021323521932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,1,float16,float16,0,0.6891520023345947
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,1,float16,fp8,0,0.6973439852396647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,1,fp8,fp8,0,0.6466613213221232
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,1,float16,float16,0,0.639141321182251
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,1,float16,fp8,0,0.6360746622085571
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,1,fp8,fp8,0,0.5949440002441406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,1,float16,float16,0,0.6353919903437296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,1,float16,fp8,0,0.6456319888432821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,1,fp8,fp8,0,0.5976746479670206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,1,float16,float16,0,0.6517759958902994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,1,float16,fp8,0,0.6463040113449097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,1,fp8,fp8,0,0.6075679858525594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,1,float16,float16,0,0.3712000052134196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,1,float16,fp8,0,0.37803200880686444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,1,float16,float16,0,0.3452586730321248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,1,float16,fp8,0,0.3452586730321248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,1,fp8,fp8,0,0.34935466448465985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,1,fp8,fp8,0,0.32307199637095135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,1,float16,float16,0,0.3490133285522461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,1,float16,fp8,0,0.3476479848225911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,1,fp8,fp8,0,0.32307199637095135
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,1,float16,float16,0,0.3548159996668498
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,1,float16,fp8,0,0.355840007464091
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,1,fp8,fp8,0,0.33023999134699505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,1,float16,float16,0,0.21009065707524618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,1,float16,float16,0,0.19165333112080893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,1,float16,fp8,0,0.21606399615605673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,1,float16,fp8,0,0.1919999917348226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,1,fp8,fp8,0,0.1991680065790812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,1,fp8,fp8,0,0.18090667327245077
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,1,float16,float16,0,0.1962666710217794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,1,float16,fp8,0,0.19353600343068442
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,1,fp8,fp8,0,0.18449066082636514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,1,float16,float16,0,0.19950934251149496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,1,float16,fp8,0,0.20053333044052124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,1,fp8,fp8,0,0.18824533621470133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,1,float16,float16,0,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,1,float16,float16,0,0.12118400136629741
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,1,float16,fp8,0,0.1225333313147227
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,1,float16,fp8,0,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,1,fp8,fp8,0,0.12527466813723245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,1,float16,float16,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,1,float16,fp8,0,0.12220266461372375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,1,fp8,fp8,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,1,float16,float16,0,0.12185600399971008
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,1,float16,fp8,0,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,1,fp8,fp8,0,0.11504000425338745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,1,float16,fp8,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,1,fp8,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,1,float16,fp8,0,0.07851733267307281
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,1,fp8,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,1,fp8,fp8,0,0.07429333527882893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,1,float16,float16,0,1.3796693483988445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,1,float16,fp8,0,1.3929813702901204
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,1,fp8,fp8,0,1.3503146171569824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,1,float16,float16,0,1.3902506828308105
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,1,float16,fp8,0,1.3981013298034668
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,1,fp8,fp8,0,1.36738125483195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,1,float16,float16,0,1.407317320505778
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,1,float16,fp8,0,1.4165333112080891
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,1,float16,float16,0,0.7714133262634277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,1,fp8,fp8,0,1.387514591217041
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,1,float16,fp8,0,0.7734613418579102
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,1,fp8,fp8,0,0.7635626792907715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,1,float16,float16,0,0.710314671198527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,1,float16,fp8,0,0.7123573621114095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,1,fp8,fp8,0,0.6939307053883871
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,1,float16,float16,0,0.7137333552042643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,1,float16,fp8,0,0.7178186575571696
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,1,fp8,fp8,0,0.697002649307251
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,1,float16,float16,0,0.7280639807383219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,1,float16,fp8,0,0.7229440212249756
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,1,float16,float16,0,0.40720534324645996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,1,fp8,fp8,0,0.7123626867930094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,1,float16,float16,0,0.37426666418711346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,1,float16,fp8,0,0.4140373468399048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,1,float16,fp8,0,0.3739306529362996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,1,fp8,fp8,0,0.3993599812189738
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,1,fp8,fp8,0,0.36403199036916095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,1,float16,float16,0,0.37768534819285077
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,1,float16,fp8,0,0.37836798032124835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,1,fp8,fp8,0,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,1,float16,float16,0,0.3824640115102132
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,1,float16,fp8,0,0.38519465923309326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,1,fp8,fp8,0,0.3742773135503133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,1,float16,float16,0,0.22289599974950156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,1,float16,float16,0,0.20035733779271445
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,1,float16,fp8,0,0.22802132368087769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,1,float16,fp8,0,0.19985065857569376
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,1,fp8,fp8,0,0.2205066680908203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,1,fp8,fp8,0,0.19781333208084106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,1,float16,float16,0,0.20428800582885742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,1,float16,fp8,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,1,fp8,fp8,0,0.202400008837382
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,1,float16,float16,0,0.21026132504145303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,1,float16,fp8,0,0.21229867140452066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,1,fp8,fp8,0,0.20428266127904257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,1,float16,float16,0,0.12902933359146118
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,1,float16,fp8,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,1,float16,fp8,0,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,1,fp8,fp8,0,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,1,float16,float16,0,0.11572800079981486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,1,float16,fp8,0,0.11844266454378764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,1,fp8,fp8,0,0.11161599556605022
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,1,float16,float16,0,0.11844266454378764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,1,float16,fp8,0,0.11844266454378764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,1,fp8,fp8,0,0.11468799908955891
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,1,float16,fp8,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,1,float16,float16,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,1,fp8,fp8,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,1,fp8,fp8,0,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,1,float16,float16,0,0.07884266475836436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,1,float16,fp8,0,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,1,fp8,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,1,fp8,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,1,fp8,fp8,0,0.05598933498064677
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,1,float16,fp8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,1,fp8,fp8,0,0.05426666637261709
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,1,float16,float16,0,1.0002773602803547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,1,float16,fp8,0,1.0091520150502522
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,1,fp8,fp8,0,1.0019839604695637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,1,float16,float16,0,1.0146133104960124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,1,float16,fp8,0,1.0074453353881836
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,1,fp8,fp8,0,1.0019839604695637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,1,float16,float16,0,1.0248533089955647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,1,float16,fp8,0,1.0245119730631511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,1,float16,float16,0,0.5611519813537598
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,1,fp8,fp8,0,1.0187093416849773
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,1,float16,fp8,0,0.566271980603536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,1,fp8,fp8,0,0.5635573466618856
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,1,float16,float16,0,0.510810653368632
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,1,float16,fp8,0,0.5073920090993246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,1,fp8,fp8,0,0.5121599833170573
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,1,float16,float16,0,0.5125120083491007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,1,float16,fp8,0,0.5118293364842733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,1,fp8,fp8,0,0.5101226568222046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,1,float16,float16,0,0.5196799834569296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,1,float16,fp8,0,0.5220746596654257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,1,fp8,fp8,0,0.5220586856206259
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,1,float16,float16,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,1,float16,float16,0,0.2688000003496806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,1,float16,fp8,0,0.3015679915746053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,1,fp8,fp8,0,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,1,fp8,fp8,0,0.27188267310460407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,1,float16,float16,0,0.2725546757380168
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,1,float16,fp8,0,0.27153066794077557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,1,fp8,fp8,0,0.27324267228444415
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,1,float16,float16,0,0.2786986629168193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,1,float16,fp8,0,0.279039998849233
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,1,fp8,fp8,0,0.2773333390553792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,1,float16,float16,0,0.16640533010164896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,1,float16,fp8,0,0.16605866948763529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,1,float16,float16,0,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,1,fp8,fp8,0,0.16588800152142844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,1,float16,fp8,0,0.14506666858990988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,1,fp8,fp8,0,0.14524799585342407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,1,float16,float16,0,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,1,float16,fp8,0,0.1462613344192505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,1,fp8,fp8,0,0.15103999773661295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,1,float16,float16,0,0.1513813336690267
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,1,float16,fp8,0,0.1534293293952942
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,1,fp8,fp8,0,0.155648003021876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,1,float16,float16,0,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,1,fp8,fp8,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,1,float16,float16,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,1,float16,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,1,fp8,fp8,0,0.08669333656628926
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,1,float16,float16,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,1,float16,fp8,0,0.09010666608810425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,1,fp8,fp8,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,1,float16,float16,0,0.0911253293355306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,1,float16,fp8,0,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,1,fp8,fp8,0,0.0897706647713979
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,1,float16,float16,0,0.056661332647005715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,1,float16,fp8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,1,float16,fp8,0,0.05733866492907206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,1,fp8,fp8,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,1,float16,float16,0,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,1,float16,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,1,fp8,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,1,float16,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,1,float16,float16,0,1.076053301493327
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,1,float16,fp8,0,1.0692266623179119
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,1,fp8,fp8,0,1.1303253173828125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,1,float16,float16,0,1.0873066584269206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,1,float16,fp8,0,1.0900479952494304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,1,fp8,fp8,0,1.1572853724161785
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,1,float16,float16,0,1.0978986422220867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,1,float16,fp8,0,1.098581314086914
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,1,fp8,fp8,0,1.19978133837382
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,1,float16,float16,0,0.6031359831492106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,1,float16,fp8,0,0.5973333517710367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,1,fp8,fp8,0,0.637440005938212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,1,float16,float16,0,0.5481813351313273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,1,float16,fp8,0,0.5457973480224609
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,1,fp8,fp8,0,0.5782186587651571
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,1,float16,float16,0,0.5546666781107584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,1,float16,fp8,0,0.5533013343811035
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,1,fp8,fp8,0,0.5942560036977133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,1,float16,float16,0,0.5659306844075521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,1,float16,fp8,0,0.559445341428121
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,1,fp8,fp8,0,0.5983573198318481
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,1,float16,float16,0,0.31487999359766644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,1,float16,fp8,0,0.31214932600657147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,1,fp8,fp8,0,0.3295573393503825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,1,float16,float16,0,0.28587732712427777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,1,float16,fp8,0,0.2827999989191691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,1,fp8,fp8,0,0.2991786599159241
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,1,float16,float16,0,0.28893866141637164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,1,float16,fp8,0,0.2892746726671855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,1,float16,fp8,0,0.2688000003496806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,1,fp8,fp8,0,0.30531734228134155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,1,float16,float16,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,1,float16,fp8,0,0.2913279930750529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,1,fp8,fp8,0,0.3097599943478902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,1,float16,float16,0,0.17151999473571777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,1,float16,fp8,0,0.1675999959309896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,1,fp8,fp8,0,0.176639993985494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,1,float16,float16,0,0.15496533115704855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,1,float16,fp8,0,0.15172800421714783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,1,fp8,fp8,0,0.15598932902018228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,1,float16,float16,0,0.15615466237068176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,1,float16,fp8,0,0.155648003021876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,1,fp8,fp8,0,0.16195733348528543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,1,float16,float16,0,0.15940266847610474
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,1,float16,fp8,0,0.15787200133005777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,1,fp8,fp8,0,0.16639999548594156
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,1,float16,float16,0,0.09898133079210918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,1,float16,fp8,0,0.09490666786829631
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,1,fp8,fp8,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,1,float16,fp8,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,1,float16,float16,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,1,float16,fp8,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,1,fp8,fp8,0,0.08771733442942302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,1,float16,float16,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,1,fp8,fp8,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,1,float16,float16,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,1,fp8,fp8,0,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,1,float16,float16,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,1,float16,fp8,0,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,1,fp8,fp8,0,0.05529066423575083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,1,float16,float16,0,0.05905599892139435
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,1,float16,float16,0,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,1,float16,fp8,0,0.058373332023620605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,1,fp8,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,1,float16,float16,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,1,fp8,fp8,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,1,fp8,fp8,0,0.03482133398453394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,1,float16,float16,0,0.9371306896209717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,1,float16,fp8,0,0.9350826740264893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,1,fp8,fp8,0,1.0002773602803547
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,1,float16,float16,0,0.9586346944173177
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,1,float16,fp8,0,0.951807975769043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,1,fp8,fp8,0,1.0361119906107585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,1,float16,float16,0,0.9712639649709066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,1,float16,fp8,0,0.9634133179982504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,1,float16,float16,0,0.5382826725641886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,1,fp8,fp8,0,1.0504533449808757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,1,float16,fp8,0,0.5256533225377401
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,1,fp8,fp8,0,0.5761706829071045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,1,float16,float16,0,0.4780373175938924
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,1,float16,fp8,0,0.4763306776682536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,1,fp8,fp8,0,0.5125120083491007
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,1,float16,float16,0,0.4886293411254883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,1,float16,fp8,0,0.4862240155537923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,1,fp8,fp8,0,0.5287253459294637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,1,float16,float16,0,0.49510399500528973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,1,float16,fp8,0,0.49373865127563477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,1,float16,float16,0,0.2821120023727417
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,1,fp8,fp8,0,0.5345280170440674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,1,float16,float16,0,0.24781332413355509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,1,float16,fp8,0,0.27562665939331055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,1,float16,fp8,0,0.24678399165471396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,1,fp8,fp8,0,0.2988319993019104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,1,fp8,fp8,0,0.26469866434733075
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,1,float16,float16,0,0.2529226740201314
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,1,float16,fp8,0,0.25224532683690387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,1,fp8,fp8,0,0.27460267146428424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,1,float16,float16,0,0.2577066620190938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,1,float16,fp8,0,0.2563413381576538
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,1,fp8,fp8,0,0.2769920031229655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,1,float16,float16,0,0.15155200163523355
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,1,float16,float16,0,0.13346133629480997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,1,float16,fp8,0,0.1474560002485911
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,1,float16,fp8,0,0.13447466492652893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,1,fp8,fp8,0,0.16076800227165222
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,1,fp8,fp8,0,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,1,float16,float16,0,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,1,float16,fp8,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,1,fp8,fp8,0,0.14591999848683676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,1,float16,float16,0,0.13772799571355185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,1,float16,fp8,0,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,1,fp8,fp8,0,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,1,float16,float16,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,1,float16,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,1,float16,float16,0,0.07714133461316426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,1,float16,fp8,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,1,fp8,fp8,0,0.09113599856694539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,1,fp8,fp8,0,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,1,float16,float16,0,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,1,float16,fp8,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,1,float16,fp8,0,0.07781333227952321
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,1,float16,float16,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,1,float16,fp8,0,0.0484746644894282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,1,float16,float16,0,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,1,float16,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,1,float16,fp8,0,0.04915733138720194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,1,float16,float16,0,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,1,float16,float16,0,0.03346133232116699
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,1,float16,float16,0,0.03312533348798752
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,1,float16,fp8,0,0.03482133398453394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,1,fp8,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,1,float16,float16,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,1,float16,float16,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,1,float16,float16,0,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,1,fp8,fp8,0,0.385535995165507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,1,float16,float16,0,0.3763306538263957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,1,float16,fp8,0,0.3725653489430745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,1,fp8,fp8,0,0.400383989016215
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,1,float16,float16,0,0.3831466833750407
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,1,float16,fp8,0,0.38246933619181317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,1,fp8,fp8,0,0.4068693319956462
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,1,float16,float16,0,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,1,float16,fp8,0,0.22630399465560913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,1,float16,float16,0,0.1919999917348226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,1,fp8,fp8,0,0.23414933681488037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,1,float16,fp8,0,0.19165867567062378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,1,fp8,fp8,0,0.20190399885177612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,1,float16,float16,0,0.19814399878184
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,1,float16,fp8,0,0.19797333081563315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,1,fp8,fp8,0,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,1,float16,float16,0,0.20309333006540933
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,1,float16,fp8,0,0.19984533389409384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,1,fp8,fp8,0,0.212991992632548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,1,float16,float16,0,0.1283466617266337
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,1,float16,fp8,0,0.1267680029074351
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,1,fp8,fp8,0,0.12902933359146118
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,1,float16,float16,0,0.10854400197664897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,1,float16,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,1,fp8,fp8,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,1,float16,float16,0,0.1109386682510376
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,1,float16,fp8,0,0.1109279990196228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,1,fp8,fp8,0,0.11469866832097371
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,1,float16,float16,0,0.11365333199501038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,1,fp8,fp8,0,0.11570666233698527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,1,float16,float16,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,1,float16,fp8,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,1,fp8,fp8,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,1,float16,float16,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,1,float16,fp8,0,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,1,float16,float16,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,1,float16,fp8,0,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,1,fp8,fp8,0,0.06246933341026306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,1,float16,float16,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,1,float16,fp8,0,0.06552533308664958
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,1,fp8,fp8,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,1,float16,fp8,0,0.04641599953174591
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,1,float16,float16,0,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,1,float16,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,1,fp8,fp8,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,1,fp8,fp8,0,0.042319998145103455
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,1,float16,fp8,0,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,1,fp8,fp8,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,1,fp8,fp8,0,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,1,fp8,fp8,0,0.02219199885924657
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,1,float16,float16,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,1,float16,float16,0,0.20377600193023682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,1,float16,fp8,0,0.203274667263031
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,1,fp8,fp8,0,0.21196800470352173
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,1,float16,float16,0,0.20855466524759927
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,1,float16,fp8,0,0.20889067649841309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,1,fp8,fp8,0,0.2205066680908203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,1,float16,float16,0,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,1,float16,fp8,0,0.21094399690628052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,1,float16,fp8,0,0.02626666675011317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,1,fp8,fp8,0,0.22288533051808676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,1,float16,float16,0,0.12936000029246011
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,1,float16,fp8,0,0.36505599816640216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,1,float16,float16,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,1,fp8,fp8,0,0.13295466701189676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,1,float16,fp8,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,1,float16,float16,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,1,float16,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,1,fp8,fp8,0,0.12117866675059001
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,1,float16,float16,0,0.11674132943153381
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,1,float16,fp8,0,0.11503466963768005
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,1,fp8,fp8,0,0.12288000186284383
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,1,float16,float16,0,0.07373333474000295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,1,float16,fp8,0,0.07270933190981548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,1,float16,fp8,0,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,1,fp8,fp8,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,1,float16,fp8,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,1,float16,float16,0,0.0433599998553594
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,1,float16,fp8,0,0.04404266675313314
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,1,float16,float16,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,1,fp8,fp8,0,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,1,float16,fp8,0,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,1,float16,fp8,0,0.04233600199222565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,1,float16,float16,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,1,float16,fp8,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,1,fp8,fp8,0,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,1,fp8,fp8,0,0.03072533259789149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,1,fp8,fp8,0,0.03002133220434189
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,1,float16,float16,0,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,1,fp8,fp8,0,0.02275199939807256
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,1,float16,fp8,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,1,float16,float16,0,0.018858666221300762
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,1,float16,float16,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,1,float16,float16,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,1,float16,float16,0,0.14044266939163208
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,1,float16,fp8,0,0.1397706667582194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,1,fp8,fp8,0,0.14984533190727234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,1,float16,float16,0,0.143696000178655
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,1,float16,fp8,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,1,fp8,fp8,0,0.15684266885121664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,1,float16,float16,0,0.14436800281206766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,1,float16,fp8,0,0.14199466506640115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,1,fp8,fp8,0,0.15889599919319153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,1,float16,float16,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,1,float16,fp8,0,0.08602133393287659
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,1,fp8,fp8,0,0.09660266836484273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,1,float16,float16,0,0.07885333398977916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,1,float16,float16,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,1,float16,fp8,0,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,1,fp8,fp8,0,0.08327466746171315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,1,float16,float16,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,1,fp8,fp8,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,1,float16,float16,0,0.050186668833096824
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,1,float16,fp8,0,0.04950400193532308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,1,fp8,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,1,float16,float16,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,1,float16,float16,0,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,1,float16,float16,0,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,1,fp8,fp8,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,1,float16,float16,0,0.03412266572316488
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,1,float16,float16,0,0.025194667279720306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,1,fp8,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,1,float16,float16,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,1,float16,fp8,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,1,float16,float16,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,1,float16,fp8,0,0.018650667121013004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,1,float16,float16,0,0.01798933371901512
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,1,fp8,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,1,fp8,fp8,0,0.018325333793958027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,1,float16,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,1,float16,fp8,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,1,float16,float16,0,0.017312000195185345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,1,fp8,fp8,0,0.01764800027012825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,1,float16,fp8,0,0.01860800012946129
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,1,float16,float16,0,0.11263466874758403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,1,float16,fp8,0,0.11297600467999776
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,1,fp8,fp8,0,0.12288000186284383
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,1,float16,float16,0,0.1129866639773051
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,1,float16,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,1,fp8,fp8,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,1,float16,float16,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,1,float16,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,1,float16,fp8,0,0.06655466556549072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,1,float16,float16,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,1,fp8,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,1,float16,fp8,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,1,float16,fp8,0,0.06518933176994324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,1,float16,float16,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,1,float16,float16,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,1,float16,fp8,0,0.04301333427429199
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,1,float16,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,1,float16,float16,0,0.03071466585000356
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,1,float16,fp8,0,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,1,float16,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,1,float16,float16,0,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,1,fp8,fp8,0,0.02276800076166789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,1,float16,float16,0,0.022469334304332733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,1,float16,fp8,0,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,1,fp8,fp8,0,0.0227360005180041
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,1,float16,fp8,0,0.022757334013779957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,1,fp8,fp8,0,0.02276266614596049
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,1,float16,float16,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,1,float16,fp8,0,0.022810667753219604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,1,fp8,fp8,0,0.02276800076166789
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,1,float16,fp8,0,0.018522666146357853
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,1,float16,fp8,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,1,float16,float16,0,0.016496000190575916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,1,float16,float16,0,0.0164533331990242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,1,float16,float16,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,1,fp8,fp8,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,1,fp8,fp8,0,0.016682667036851246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,1,float16,float16,0,0.01646399994691213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,1,float16,float16,0,0.016538667182127636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,1,float16,fp8,0,0.017653333644072216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,1,float16,float16,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,1,float16,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,1,fp8,fp8,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,1,float16,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,1,float16,float16,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,1,fp8,fp8,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,1,float16,float16,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,1,float16,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,1,fp8,fp8,0,0.10956266522407532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,1,float16,float16,0,0.059402664502461754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,1,float16,fp8,0,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,1,float16,fp8,0,0.03719466676314672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,1,fp8,fp8,0,0.04028266668319702
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,1,fp8,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,1,float16,fp8,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,1,float16,fp8,0,0.025936000049114227
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,1,float16,fp8,0,0.020746666938066483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,1,fp8,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,1,float16,float16,0,0.021381333470344543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,1,fp8,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,1,float16,float16,0,0.017621333400408428
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,1,float16,float16,0,0.017498667041460674
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,1,float16,float16,0,0.016602666427691776
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,1,fp8,fp8,0,0.01657066618402799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,1,fp8,fp8,0,0.016469333320856094
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,1,float16,float16,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,1,float16,float16,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,1,fp8,fp8,0,0.01657066618402799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,1,float16,float16,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,1,float16,float16,0,0.016629333297411602
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,1,float16,float16,0,3.2709973653157554
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,1,float16,fp8,0,3.399850527445475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,1,fp8,fp8,0,2.8538878758748374
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,1,float16,fp8,0,3.3640054066975913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,1,float16,float16,0,3.3576958974202475
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,1,fp8,fp8,0,2.8641281127929688
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,1,float16,float16,0,3.3696425755818686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,1,float16,float16,0,1.7672533988952637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,1,fp8,fp8,0,2.8823893864949546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,1,float16,fp8,0,3.4348319371541343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,1,float16,fp8,0,1.8256212870279949
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,1,float16,float16,0,1.75598939259847
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,1,float16,fp8,0,1.7652053833007812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,1,fp8,fp8,0,1.500501314798991
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,1,fp8,fp8,0,1.545904000600179
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,1,float16,float16,0,1.713322639465332
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,1,float16,fp8,0,1.746773401896159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,1,fp8,fp8,0,1.5189332962036133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,1,float16,float16,0,1.741653283437093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,1,float16,float16,0,0.9705813725789388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,1,float16,fp8,0,1.7273173332214355
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,1,fp8,fp8,0,1.5213227272033691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,1,float16,fp8,0,0.999936024347941
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,1,float16,float16,0,0.9627306461334229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,1,float16,fp8,0,0.97979736328125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,1,fp8,fp8,0,0.859824021657308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,1,fp8,fp8,0,0.8343946933746338
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,1,float16,float16,0,0.9671680132548014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,1,float16,fp8,0,0.9859413305918375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,1,fp8,fp8,0,0.8418986797332764
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,1,float16,float16,0,0.9838933149973551
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,1,float16,fp8,0,0.9767253398895264
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,1,fp8,fp8,0,0.8458293279012045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,1,float16,float16,0,0.5864106814066569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,1,float16,float16,0,0.5696853399276733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,1,float16,fp8,0,0.5864106814066569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,1,fp8,fp8,0,0.5104639927546183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,1,float16,fp8,0,0.5737813313802084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,1,fp8,fp8,0,0.49885865052541095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,1,float16,float16,0,0.5676373243331909
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,1,float16,fp8,0,0.5669600168863932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,1,fp8,fp8,0,0.5019306739171346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,1,float16,float16,0,0.5754506587982178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,1,float16,fp8,0,0.5771946509679159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,1,fp8,fp8,0,0.5039466619491577
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,1,float16,float16,0,1.9947412808736165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,1,float16,fp8,0,1.9930399258931477
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,1,fp8,fp8,0,1.729365348815918
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,1,float16,float16,0,1.959935983022054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,1,float16,fp8,0,2.011136054992676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,1,fp8,fp8,0,1.7460907300313313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,1,float16,float16,0,2.0694986979166665
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,1,float16,fp8,0,1.9821227391560872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,1,float16,float16,0,1.12008531888326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,1,fp8,fp8,0,1.759402592976888
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,1,float16,fp8,0,1.1262293656667073
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,1,float16,float16,0,1.0852693716684978
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,1,fp8,fp8,0,0.9644373257954916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,1,float16,fp8,0,1.094480037689209
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,1,fp8,fp8,0,0.9296267032623291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,1,float16,float16,0,1.0907306671142578
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,1,float16,fp8,0,1.0781013170878093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,1,fp8,fp8,0,0.9378133614857992
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,1,float16,float16,0,1.0989226500193279
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,1,float16,float16,0,0.6283946832021078
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,1,float16,fp8,0,1.1054080327351887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,1,fp8,fp8,0,0.9432746569315592
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,1,float16,fp8,0,0.6284000078837076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,1,float16,float16,0,0.6031253337860107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,1,float16,fp8,0,0.5993813276290894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,1,fp8,fp8,0,0.5481813351313273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,1,fp8,fp8,0,0.5280426740646362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,1,float16,float16,0,0.6109866698582967
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,1,float16,fp8,0,0.6038186550140381
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,1,fp8,fp8,0,0.5328160127003988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,1,float16,float16,0,0.6150826613108317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,1,float16,fp8,0,0.6191893418629965
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,1,fp8,fp8,0,0.5348693529764811
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,1,float16,float16,0,0.37939198811848956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,1,float16,float16,0,0.36266668637593585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,1,float16,fp8,0,0.37939198811848956
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,1,float16,fp8,0,0.3691519896189372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,1,fp8,fp8,0,0.3380906581878662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,1,fp8,fp8,0,0.32546132802963257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,1,float16,float16,0,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,1,float16,fp8,0,0.36950401465098065
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,1,fp8,fp8,0,0.32819199562072754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,1,float16,float16,0,0.3647093375523885
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,1,float16,fp8,0,0.3691466649373372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,1,fp8,fp8,0,0.33160533507664997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,1,float16,float16,0,1.489583969116211
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,1,float16,fp8,0,1.4515199661254883
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,1,fp8,fp8,0,1.2760746479034424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,1,float16,float16,0,1.4639786084493
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,1,float16,fp8,0,1.5011839866638184
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,1,fp8,fp8,0,1.2839306990305583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,1,float16,float16,0,1.465354601542155
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,1,float16,fp8,0,1.5336106618245442
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,1,float16,float16,0,0.8197066783905029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,1,fp8,fp8,0,1.2958719730377197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,1,float16,fp8,0,0.8347307046254476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,1,fp8,fp8,0,0.7215786774953207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,1,float16,float16,0,0.7896746794382731
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,1,float16,fp8,0,0.8002560138702393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,1,fp8,fp8,0,0.6922187010447184
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,1,float16,float16,0,0.8108373483022054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,1,float16,fp8,0,0.7992266813913981
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,1,fp8,fp8,0,0.6949546337127686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,1,float16,float16,0,0.812544027964274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,1,float16,float16,0,0.47121067841847736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,1,float16,fp8,0,0.8200639883677164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,1,fp8,fp8,0,0.7028106848398844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,1,float16,fp8,0,0.47223468621571857
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,1,float16,float16,0,0.45414932568868
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,1,float16,fp8,0,0.44835734367370605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,1,fp8,fp8,0,0.41915734608968097
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,1,fp8,fp8,0,0.3993599812189738
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,1,float16,float16,0,0.4585813283920288
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,1,float16,fp8,0,0.45176533857981366
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,1,fp8,fp8,0,0.40516265233357746
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,1,float16,float16,0,0.4613120158513387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,1,float16,fp8,0,0.46642665068308514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,1,fp8,fp8,0,0.4065279960632324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,1,float16,float16,0,0.29337600866953534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,1,float16,float16,0,0.2834773262341817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,1,float16,fp8,0,0.2872320016225179
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,1,float16,fp8,0,0.2954346736272176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,1,fp8,fp8,0,0.2653866608937581
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,1,fp8,fp8,0,0.25598933299382526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,1,float16,float16,0,0.2882560094197591
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,1,float16,fp8,0,0.28587732712427777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,1,fp8,fp8,0,0.25702399015426636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,1,float16,float16,0,0.28518933057785034
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,1,float16,fp8,0,0.28962133328119916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,1,fp8,fp8,0,0.25701866547266644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,1,float16,float16,0,1.9314346313476562
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,1,float16,fp8,0,1.8921866416931152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,1,fp8,fp8,0,1.6878933906555176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,1,float16,float16,0,1.9776852925618489
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,1,float16,fp8,0,1.9988479614257812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,1,fp8,fp8,0,1.6994986534118652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,1,float16,float16,0,1.9377493858337402
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,1,float16,fp8,0,1.9797333081563313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,1,float16,float16,0,1.0579626560211182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,1,fp8,fp8,0,1.7194666862487793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,1,float16,fp8,0,1.081173340479533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,1,fp8,fp8,0,0.9347519874572754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,1,float16,float16,0,0.9791200160980225
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,1,float16,fp8,0,1.010858694712321
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,1,fp8,fp8,0,0.8912160396575928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,1,float16,float16,0,1.0306560198465984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,1,float16,fp8,0,1.0231573581695557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,1,fp8,fp8,0,0.8970239957173666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,1,float16,float16,0,1.0180266698201497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,1,float16,float16,0,0.585045337677002
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,1,float16,fp8,0,1.0303146839141846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,1,fp8,fp8,0,0.9041919708251953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,1,float16,fp8,0,0.5888053178787231
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,1,float16,float16,0,0.5556906859079996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,1,float16,fp8,0,0.5522773265838623
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,1,fp8,fp8,0,0.5145599842071533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,1,fp8,fp8,0,0.4920320113499959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,1,float16,float16,0,0.562175989151001
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,1,float16,fp8,0,0.5573973258336385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,1,fp8,fp8,0,0.4957866668701172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,1,float16,float16,0,0.5690079927444458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,1,float16,fp8,0,0.5724159876505533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,1,fp8,fp8,0,0.4991999864578247
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,1,float16,float16,0,0.34116268157958984
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,1,float16,float16,0,0.31966400146484375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,1,float16,fp8,0,0.3432106574376424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,1,fp8,fp8,0,0.3036160071690877
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,1,float16,fp8,0,0.32546132802963257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,1,fp8,fp8,0,0.28654932975769043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,1,float16,float16,0,0.3275093237559001
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,1,float16,fp8,0,0.3261386752128601
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,1,fp8,fp8,0,0.2899679938952128
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,1,float16,float16,0,0.3285333315531413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,1,float16,fp8,0,0.33399466673533124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,1,float16,float16,0,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,1,fp8,fp8,0,0.2954240043958028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,1,float16,fp8,0,0.21572266022364298
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,1,float16,float16,0,0.212991992632548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,1,float16,fp8,0,0.211626668771108
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,1,fp8,fp8,0,0.19933867454528809
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,1,fp8,fp8,0,0.19114667177200317
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,1,float16,float16,0,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,1,float16,fp8,0,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,1,fp8,fp8,0,0.1945599913597107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,1,float16,float16,0,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,1,float16,fp8,0,0.21435733636220297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,1,fp8,fp8,0,0.19250667095184326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,1,float16,float16,0,1.1830613613128662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,1,float16,fp8,0,1.1881813208262126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,1,fp8,fp8,0,1.067519982655843
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,1,float16,float16,0,1.2120746771494548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,1,float16,fp8,0,1.2069546381632488
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,1,fp8,fp8,0,1.0821973482767742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,1,float16,float16,0,1.2393813133239746
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,1,float16,fp8,0,1.252351999282837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,1,float16,float16,0,0.6744746367136637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,1,fp8,fp8,0,1.1016480127970378
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,1,float16,fp8,0,0.6925653616587321
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,1,fp8,fp8,0,0.605183998743693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,1,float16,float16,0,0.6391466856002808
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,1,float16,fp8,0,0.6490453481674194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,1,fp8,fp8,0,0.5713920195897421
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,1,float16,float16,0,0.652458667755127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,1,float16,fp8,0,0.6381226778030396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,1,fp8,fp8,0,0.578223983446757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,1,float16,float16,0,0.6655999819437662
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,1,float16,fp8,0,0.6649173498153687
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,1,float16,float16,0,0.3797333240509033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,1,fp8,fp8,0,0.5833386580149332
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,1,float16,fp8,0,0.38486401240030926
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,1,float16,float16,0,0.3592533270517985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,1,float16,fp8,0,0.355840007464091
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,1,fp8,fp8,0,0.3404853343963623
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,1,fp8,fp8,0,0.32341333230336505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,1,float16,float16,0,0.3647199869155884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,1,float16,fp8,0,0.3599413235982259
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,1,fp8,fp8,0,0.32477867603302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,1,float16,float16,0,0.36983998616536456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,1,float16,fp8,0,0.3735893170038859
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,1,fp8,fp8,0,0.3295573393503825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,1,float16,float16,0,0.22425599892934164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,1,float16,float16,0,0.211626668771108
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,1,float16,fp8,0,0.22733332713445029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,1,fp8,fp8,0,0.2063360015551249
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,1,float16,fp8,0,0.21332800388336182
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,1,fp8,fp8,0,0.19097065925598145
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,1,float16,float16,0,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,1,float16,fp8,0,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,1,fp8,fp8,0,0.19217065970102945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,1,float16,float16,0,0.21640533208847046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,1,float16,fp8,0,0.21845332781473795
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,1,float16,float16,0,0.14284800489743552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,1,fp8,fp8,0,0.20053333044052124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,1,float16,fp8,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,1,float16,float16,0,0.14114133516947427
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,1,fp8,fp8,0,0.13381333152453104
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,1,fp8,fp8,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,1,float16,float16,0,0.14045866330464682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,1,float16,fp8,0,0.1430186629295349
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,1,fp8,fp8,0,0.1307413379351298
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,1,float16,float16,0,0.14114666978518167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,1,float16,fp8,0,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,1,fp8,fp8,0,0.13209600249926248
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,1,float16,fp8,0,1.2726613680521648
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,1,float16,float16,0,1.2588373025258381
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,1,fp8,fp8,0,1.135797341664632
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,1,float16,float16,0,1.2818773587544758
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,1,float16,fp8,0,1.2552533149719238
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,1,fp8,fp8,0,1.152511994043986
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,1,float16,fp8,0,1.284607966740926
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,1,float16,float16,0,1.2979199886322021
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,1,fp8,fp8,0,1.1709333260854085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,1,float16,fp8,0,0.715440034866333
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,1,float16,float16,0,0.7007573445638021
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,1,fp8,fp8,0,0.6398293177286783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,1,float16,fp8,0,0.6601440111796061
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,1,float16,float16,0,0.6608159939448038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,1,fp8,fp8,0,0.5922133525212606
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,1,float16,fp8,0,0.6690133412679037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,1,float16,float16,0,0.6611626545588175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,1,fp8,fp8,0,0.5976746479670206
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,1,float16,float16,0,0.6826666990915934
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,1,float16,fp8,0,0.6761706670125326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,1,fp8,fp8,0,0.6109813451766968
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,1,float16,float16,0,0.3862186670303345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,1,float16,fp8,0,0.38279998302459717
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,1,fp8,fp8,0,0.34799468517303467
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,1,float16,float16,0,0.36027733484903973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,1,float16,fp8,0,0.35628799597422284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,1,fp8,fp8,0,0.32341333230336505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,1,float16,fp8,0,0.35949865976969403
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,1,fp8,fp8,0,0.3275039990743001
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,1,float16,float16,0,0.3657386700312297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,1,float16,fp8,0,0.36471466223398846
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,1,fp8,fp8,0,0.334330677986145
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,1,float16,float16,0,0.2167466680208842
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,1,float16,fp8,0,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,1,fp8,fp8,0,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,1,float16,float16,0,0.20087466637293497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,1,float16,fp8,0,0.20155733823776245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,1,fp8,fp8,0,0.18568533658981323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,1,float16,float16,0,0.20361600319544473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,1,float16,fp8,0,0.20360533396402994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,1,fp8,fp8,0,0.1884160041809082
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,1,float16,float16,0,0.20770132541656494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,1,float16,fp8,0,0.20940800507863364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,1,float16,float16,0,0.1327786644299825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,1,fp8,fp8,0,0.19473065932591757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,1,float16,fp8,0,0.1346560021241506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,1,fp8,fp8,0,0.12732799847920737
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,1,float16,float16,0,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,1,float16,fp8,0,0.12970133622487387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,1,fp8,fp8,0,0.11980266372362773
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,1,float16,float16,0,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,1,float16,fp8,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,1,float16,float16,0,0.13061333696047464
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,1,float16,fp8,0,0.13108266393343607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,1,fp8,fp8,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,1,fp8,fp8,0,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,1,float16,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,1,float16,float16,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,1,float16,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,1,fp8,fp8,0,0.08227199812730153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,1,float16,fp8,0,0.08636266986529033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,1,fp8,fp8,0,0.08022400240103404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,1,float16,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,1,fp8,fp8,0,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,1,float16,float16,0,0.8272213141123453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,1,float16,fp8,0,0.8190293312072754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,1,fp8,fp8,0,0.7758506933848063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,1,float16,float16,0,0.8289279937744141
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,1,float16,fp8,0,0.8367733160654703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,1,fp8,fp8,0,0.7867733637491862
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,1,float16,float16,0,0.854698657989502
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,1,float16,fp8,0,0.8441173235575358
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,1,fp8,fp8,0,0.8053759733835856
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,1,float16,float16,0,0.46540268262227374
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,1,float16,fp8,0,0.4766720136006673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,1,fp8,fp8,0,0.44287999471028644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,1,float16,float16,0,0.43775999546051025
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,1,float16,fp8,0,0.43434667587280273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,1,fp8,fp8,0,0.40994131565093994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,1,float16,float16,0,0.44014934698740643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,1,float16,fp8,0,0.14386666814486185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,1,float16,fp8,0,0.44356266657511395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,1,fp8,fp8,0,0.41335467497507733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,1,float16,float16,0,0.4517600138982137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,1,float16,fp8,0,0.4527839819590251
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,1,fp8,fp8,0,0.4217173258463542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,1,float16,float16,0,0.2604479988416036
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,1,float16,fp8,0,0.26316799720128375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,1,fp8,fp8,0,0.24473599592844644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,1,float16,float16,0,0.23961599667867026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,1,float16,fp8,0,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,1,fp8,fp8,0,0.2249386707941691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,1,float16,float16,0,0.24472532669703165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,1,float16,fp8,0,0.24541866779327393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,1,fp8,fp8,0,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,1,float16,float16,0,0.25020267566045123
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,1,float16,fp8,0,0.24951465924580893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,1,fp8,fp8,0,0.23449599742889404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,1,float16,float16,0,0.15121066570281982
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,1,float16,fp8,0,0.1527466674645742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,1,fp8,fp8,0,0.14267733693122864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,1,float16,float16,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,1,float16,fp8,0,0.13738666971524557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,1,fp8,fp8,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,1,float16,float16,0,0.1418346663316091
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,1,float16,fp8,0,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,1,float16,float16,0,0.14404267072677612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,1,float16,fp8,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,1,fp8,fp8,0,0.13806933164596558
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,1,float16,float16,0,0.09078933795293172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,1,fp8,fp8,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,1,float16,float16,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,1,float16,fp8,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,1,float16,float16,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,1,float16,fp8,0,0.09181867043177287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,1,fp8,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,1,float16,float16,0,0.09250666697820027
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,1,float16,fp8,0,0.09113599856694539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,1,fp8,fp8,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,1,float16,fp8,0,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,1,float16,float16,0,0.9586346944173177
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,1,float16,fp8,0,0.951807975769043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,1,float16,float16,0,0.3643733263015747
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,1,fp8,fp8,0,0.9306453069051107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,1,float16,float16,0,0.9647680123647054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,1,float16,fp8,0,0.9685333569844564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,1,fp8,fp8,0,0.9432746569315592
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,1,float16,float16,0,0.9787840048472086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,1,float16,fp8,0,0.9801386992136637
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,1,fp8,fp8,0,0.9644373257954916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,1,float16,float16,0,0.532480001449585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,1,float16,fp8,0,0.5341813166936239
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,1,fp8,fp8,0,0.5220693349838257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,1,float16,float16,0,0.4882826805114746
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,1,float16,fp8,0,0.4882880051930745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,1,fp8,fp8,0,0.4766613245010376
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,1,float16,float16,0,0.49544533093770343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,1,float16,fp8,0,0.4968106746673584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,1,fp8,fp8,0,0.48349865277608234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,1,float16,float16,0,0.5053439935048422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,1,float16,fp8,0,0.5090986490249634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,1,fp8,fp8,0,0.49715733528137207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,1,float16,float16,0,0.2831413348515828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,1,float16,fp8,0,0.2851840058962504
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,1,fp8,fp8,0,0.2773333390553792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,1,float16,float16,0,0.2573759953180949
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,1,float16,fp8,0,0.2604373296101888
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,1,fp8,fp8,0,0.2532693346341451
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,1,float16,float16,0,0.26436267296473187
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,1,float16,fp8,0,0.26419200499852497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,1,fp8,fp8,0,0.2573653260866801
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,1,float16,float16,0,0.27050666014353436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,1,float16,fp8,0,0.27324267228444415
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,1,fp8,fp8,0,0.2650453249613444
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,1,float16,float16,0,0.15718400478363037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,1,float16,fp8,0,0.15889066457748413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,1,fp8,fp8,0,0.15546666582425436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,1,float16,float16,0,0.13858133554458618
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,1,float16,fp8,0,0.14079999923706055
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,1,fp8,fp8,0,0.1365333298842112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,1,float16,float16,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,1,float16,fp8,0,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,1,fp8,fp8,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,1,float16,float16,0,0.14882133404413858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,1,float16,fp8,0,0.14865066607793173
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,1,float16,float16,0,0.0890826682249705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,1,fp8,fp8,0,0.1479680041472117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,1,float16,fp8,0,0.09181867043177287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,1,fp8,fp8,0,0.09113599856694539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,1,fp8,fp8,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,1,float16,float16,0,0.08567999800046285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,1,float16,fp8,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,1,fp8,fp8,0,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,1,fp8,fp8,0,0.13106133540471396
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,1,float16,float16,0,0.08704533179601033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,1,float16,fp8,0,0.0867039958635966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,1,float16,fp8,0,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,1,float16,float16,0,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,1,float16,fp8,0,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,1,float16,float16,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,1,float16,fp8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,1,float16,fp8,0,0.05161066850026449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,1,float16,fp8,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,1,float16,float16,0,0.6843732992808024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,1,float16,fp8,0,0.6823253631591797
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,1,fp8,fp8,0,0.6782293319702148
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,1,float16,float16,0,0.6946132977803549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,1,float16,fp8,0,0.6935893694559733
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,1,fp8,fp8,0,0.6884693304697672
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,1,float16,float16,0,0.7014400164286295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,1,float16,fp8,0,0.7038293679555258
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,1,fp8,fp8,0,0.6993920008341471
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,1,float16,float16,0,0.3831520080566406
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,1,float16,fp8,0,0.38792534669240314
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,1,fp8,fp8,0,0.38690133889516193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,1,float16,float16,0,0.35072533289591473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,1,float16,fp8,0,0.35277867317199707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,1,fp8,fp8,0,0.3510613441467285
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,1,float16,float16,0,0.355840007464091
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,1,float16,fp8,0,0.3592533270517985
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,1,fp8,fp8,0,0.3582293192545573
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,1,float16,float16,0,0.36232535044352215
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,1,float16,fp8,0,0.3660800059636434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,1,fp8,fp8,0,0.36403199036916095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,1,float16,float16,0,0.2071733276049296
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,1,float16,fp8,0,0.20992000897725424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,1,fp8,fp8,0,0.20856000979741415
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,1,float16,float16,0,0.18551466862360635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,1,float16,fp8,0,0.18824533621470133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,1,fp8,fp8,0,0.18858667214711508
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,1,float16,float16,0,0.19029333194096884
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,1,float16,fp8,0,0.19234132766723633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,1,fp8,fp8,0,0.19319466749827066
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,1,float16,float16,0,0.19593065977096558
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,1,float16,fp8,0,0.1996799906094869
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,1,fp8,fp8,0,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,1,float16,float16,0,0.1181013286113739
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,1,float16,fp8,0,0.11878400047620137
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,1,float16,float16,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,1,float16,fp8,0,0.10547199845314026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,1,fp8,fp8,0,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,1,float16,float16,0,0.10615467031796773
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,1,float16,fp8,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,1,fp8,fp8,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,1,float16,float16,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,1,float16,float16,0,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,1,fp8,fp8,0,0.11025066177050273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,1,float16,float16,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,1,float16,fp8,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,1,fp8,fp8,0,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,1,float16,float16,0,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,1,float16,fp8,0,0.0699786643187205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,1,fp8,fp8,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,1,float16,float16,0,0.06894400219122569
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,1,float16,fp8,0,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,1,float16,fp8,0,0.04950400193532308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,1,float16,float16,0,0.048138668139775596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,1,fp8,fp8,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,1,float16,fp8,0,0.0481333335240682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,1,float16,float16,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,1,fp8,fp8,0,0.04124266654253006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,1,float16,fp8,0,0.04200000067551931
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,1,float16,float16,0,0.043338666359583534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,1,float16,fp8,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,1,float16,float16,0,0.7297759850819906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,1,float16,fp8,0,0.7243093649546305
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,1,fp8,fp8,0,0.7563947041829427
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,1,float16,float16,0,0.7359093030293783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,1,float16,fp8,0,0.7321600119272867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,1,fp8,fp8,0,0.7693706353505453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,1,float16,float16,0,0.7581013043721517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,1,float16,fp8,0,0.7485439777374268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,1,fp8,fp8,0,0.846506675084432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,1,float16,float16,0,0.41096532344818115
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,1,float16,fp8,0,0.4037919839223226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,1,fp8,fp8,0,0.4275146722793579
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,1,float16,float16,0,0.37597866853078205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,1,float16,fp8,0,0.37324798107147217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,1,fp8,fp8,0,0.38997864723205566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,1,float16,float16,0,0.38075733184814453
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,1,float16,fp8,0,0.3766560157140096
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,1,fp8,fp8,0,0.39561065038045246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,1,float16,float16,0,0.39202133814493817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,1,fp8,fp8,0,0.4208639860153198
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,1,float16,fp8,0,0.3858773310979207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,1,float16,float16,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,1,float16,float16,0,0.2194719910621643
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,1,float16,fp8,0,0.212991992632548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,1,float16,float16,0,0.19985065857569376
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,1,fp8,fp8,0,0.22595733404159546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,1,float16,fp8,0,0.19899733861287436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,1,fp8,fp8,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,1,float16,float16,0,0.20155733823776245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,1,float16,fp8,0,0.19985065857569376
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,1,fp8,fp8,0,0.2117919921875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,1,float16,float16,0,0.20676799615224203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,1,float16,fp8,0,0.20770132541656494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,1,fp8,fp8,0,0.21913067499796549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,1,float16,float16,0,0.12219199538230896
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,1,float16,fp8,0,0.11913599570592244
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,1,float16,float16,0,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,1,float16,fp8,0,0.11058666308720906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,1,fp8,fp8,0,0.11059199770291646
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,1,float16,float16,0,0.11059733231862386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,1,float16,fp8,0,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,1,fp8,fp8,0,0.11639466881752014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,1,float16,float16,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,1,float16,fp8,0,0.11400533715883891
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,1,float16,float16,0,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,1,fp8,fp8,0,0.12185600399971008
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,1,float16,fp8,0,0.06723199784755707
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,1,float16,float16,0,0.06525866687297821
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,1,float16,fp8,0,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,1,fp8,fp8,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,1,float16,float16,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,1,float16,fp8,0,0.06520533561706543
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,1,fp8,fp8,0,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,1,float16,fp8,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,1,float16,fp8,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,1,fp8,fp8,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,1,float16,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,1,fp8,fp8,0,0.03528533379236857
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,1,float16,float16,0,0.033786666889985405
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,1,float16,fp8,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,1,float16,float16,0,0.6340266863505045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,1,float16,fp8,0,0.6336853504180908
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,1,fp8,fp8,0,0.679253339767456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,1,float16,float16,0,0.6493866840998331
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,1,float16,fp8,0,0.6384640137354533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,1,fp8,fp8,0,0.6980266571044922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,1,float16,float16,0,0.6714026927947998
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,1,float16,fp8,0,0.6614986658096313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,1,float16,float16,0,0.3616480032602946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,1,fp8,fp8,0,0.771071990331014
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,1,float16,fp8,0,0.3561760187149048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,1,fp8,fp8,0,0.3906613190968831
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,1,float16,float16,0,0.3285333315531413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,1,float16,fp8,0,0.3251146674156189
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,1,fp8,fp8,0,0.3500373363494873
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,1,float16,float16,0,0.33297065893809
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,1,float16,fp8,0,0.33126399914423627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,1,fp8,fp8,0,0.35788265864054364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,1,float16,float16,0,0.34627731641133624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,1,float16,fp8,0,0.3418453137079875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,1,fp8,fp8,0,0.3872426748275757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,1,float16,float16,0,0.19268266359965006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,1,float16,fp8,0,0.19012266397476196
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,1,fp8,fp8,0,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,1,float16,float16,0,0.17510400215784708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,1,float16,fp8,0,0.17322667439778647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,1,fp8,fp8,0,0.1858560045560201
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,1,float16,float16,0,0.17800533771514893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,1,float16,fp8,0,0.17698132991790771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,1,fp8,fp8,0,0.18807466824849448
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,1,float16,float16,0,0.18397865692774454
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,1,float16,fp8,0,0.18073066075642905
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,1,float16,float16,0,0.10683199763298035
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,1,fp8,fp8,0,0.19950934251149496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,1,float16,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,1,fp8,fp8,0,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,1,float16,float16,0,0.09829866886138916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,1,float16,fp8,0,0.09795733292897542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,1,fp8,fp8,0,0.10444800059000652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,1,float16,float16,0,0.10241066416104634
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,1,float16,fp8,0,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,1,float16,float16,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,1,fp8,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,1,fp8,fp8,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,1,float16,float16,0,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,1,fp8,fp8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,1,float16,float16,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,1,float16,fp8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,1,float16,float16,0,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,1,fp8,fp8,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,1,float16,fp8,0,0.03514133393764496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,1,float16,float16,0,0.026975999275843304
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,1,float16,fp8,0,0.028677334388097126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,1,float16,fp8,0,0.024858665963013966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,1,float16,float16,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,1,float16,fp8,0,0.022789334257443745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,1,float16,float16,0,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,1,float16,float16,0,0.024501333634058636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,1,float16,float16,0,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,1,float16,fp8,0,0.23995733261108398
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,1,fp8,fp8,0,0.2621440092722575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,1,float16,float16,0,0.24951465924580893
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,1,float16,fp8,0,0.24541866779327393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,1,fp8,fp8,0,0.27153066794077557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,1,float16,float16,0,0.2698240081469218
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,1,float16,fp8,0,0.2635093331336975
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,1,float16,float16,0,0.15718400478363037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,1,fp8,fp8,0,0.2892799973487854
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,1,float16,fp8,0,0.15308266878128052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,1,fp8,fp8,0,0.1629866659641266
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,1,float16,float16,0,0.13107200463612875
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,1,float16,fp8,0,0.12970667084058127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,1,fp8,fp8,0,0.14165332913398743
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,1,float16,float16,0,0.13823999961217245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,1,float16,fp8,0,0.13396799564361572
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,1,fp8,fp8,0,0.14387733737627664
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,1,float16,float16,0,0.14591466387112936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,1,float16,float16,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,1,fp8,fp8,0,0.15547733505566916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,1,fp8,fp8,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,1,float16,float16,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,1,float16,fp8,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,1,float16,fp8,0,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,1,fp8,fp8,0,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,1,float16,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,1,fp8,fp8,0,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,1,fp8,fp8,0,0.04574933151404063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,1,float16,float16,0,0.048138668139775596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,1,float16,fp8,0,0.032431999842325844
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,1,fp8,fp8,0,0.02183466653029124
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,1,float16,float16,0,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,1,fp8,fp8,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,1,float16,fp8,0,0.020746666938066483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,1,float16,float16,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,1,float16,float16,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,1,fp8,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,1,float16,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,1,float16,float16,0,0.1384106675783793
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,1,float16,fp8,0,0.13739200433095297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,1,fp8,fp8,0,0.14658666650454202
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,1,float16,float16,0,0.1397706667582194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,1,float16,fp8,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,1,fp8,fp8,0,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,1,float16,float16,0,0.1469493309656779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,1,float16,fp8,0,0.1454080045223236
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,1,fp8,fp8,0,0.15956800182660422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,1,float16,float16,0,0.0897759993871053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,1,float16,fp8,0,0.08636266986529033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,1,fp8,fp8,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,1,float16,float16,0,0.07782933115959167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,1,fp8,fp8,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,1,fp8,fp8,0,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,1,fp8,fp8,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,1,float16,float16,0,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,1,fp8,fp8,0,0.04881600042184194
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,1,float16,fp8,0,0.048138668139775596
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,1,float16,float16,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,1,float16,fp8,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,1,float16,float16,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,1,float16,float16,0,0.022805333137512207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,1,fp8,fp8,0,0.022800001005331676
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,1,float16,float16,0,0.01979200045267741
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,1,float16,float16,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,1,float16,fp8,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,1,float16,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,1,float16,float16,0,0.017978666971127193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,1,float16,float16,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,1,float16,fp8,0,0.018613333503405254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,1,float16,fp8,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,1,fp8,fp8,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,1,float16,float16,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,1,float16,fp8,0,0.09795733292897542
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,1,fp8,fp8,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,1,float16,fp8,0,0.10275200009346008
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,1,fp8,fp8,0,0.1160533328851064
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,1,fp8,fp8,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,1,fp8,fp8,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,1,float16,fp8,0,0.05871466795603434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,1,float16,float16,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,1,float16,float16,0,0.025589334468046825
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,1,float16,float16,0,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,1,float16,fp8,0,0.020495999604463577
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,1,float16,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,1,float16,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,1,float16,float16,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,1,float16,float16,0,0.08225066463152568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,1,fp8,fp8,0,0.052576000491778054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,1,float16,float16,0,0.03242133309443792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,1,float16,fp8,0,0.024458666642506916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,1,float16,float16,0,0.02277333289384842
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,1,fp8,fp8,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,1,float16,float16,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,1,float16,fp8,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,1,fp8,fp8,0,0.018725333114465077
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,1,float16,fp8,0,0.018672000616788864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,1,float16,fp8,0,0.017653333644072216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,1,float16,fp8,0,0.017637333522240322
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,1,float16,float16,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,1,fp8,fp8,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,1,float16,float16,0,0.016789333273967106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,1,float16,float16,0,0.07406400144100189
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,1,fp8,fp8,0,0.0788373351097107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,1,float16,float16,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,1,float16,float16,0,0.043338666359583534
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,1,float16,fp8,0,0.04367466767628988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,1,fp8,fp8,0,0.048800001541773476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,1,float16,float16,0,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,1,float16,fp8,0,0.029690665503342945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,1,float16,float16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,1,float16,float16,0,0.030389333764712017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,1,float16,float16,0,0.01865600049495697
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,1,float16,float16,0,0.016682667036851246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,1,fp8,fp8,0,0.01836266616980235
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,1,float16,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,1,float16,float16,0,0.016575999557971954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,1,float16,fp8,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,1,float16,float16,0,0.01646399994691213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,1,float16,float16,0,0.016575999557971954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,1,fp8,fp8,0,0.01646399994691213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,1,float16,float16,0,1.7382399241129558
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,1,float16,fp8,0,1.7771466573079426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,1,fp8,fp8,0,1.493674596150716
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,1,float16,float16,0,1.7624746958414714
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,1,float16,fp8,0,1.7314133644104004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,1,fp8,fp8,0,1.508687973022461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,1,float16,float16,0,0.9838293393452963
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,1,float16,fp8,0,0.9828693072001139
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,1,fp8,fp8,0,0.8394986788431803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,1,float16,float16,0,0.9647786617279053
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,1,float16,fp8,0,0.9586293697357178
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,1,fp8,fp8,0,0.8180053234100342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,1,float16,float16,0,0.9722879727681478
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,1,float16,fp8,0,0.9681920210520426
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,1,fp8,fp8,0,0.8255093097686768
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,1,float16,float16,0,0.5669493277867635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,1,float16,fp8,0,0.5642293294270834
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,1,fp8,fp8,0,0.49168535073598224
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,1,float16,float16,0,0.5574026505152384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,1,float16,fp8,0,0.5488693316777548
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,1,fp8,fp8,0,0.4790666500727336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,1,fp8,fp8,0,0.48554666837056476
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,1,float16,fp8,0,0.5546666781107584
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,1,float16,float16,0,0.5635413328806559
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,1,fp8,fp8,0,0.3189706603686015
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,1,float16,float16,0,0.3538026809692383
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,1,float16,float16,0,0.35447466373443604
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,1,float16,fp8,0,0.3490133285522461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,1,fp8,fp8,0,0.31487999359766644
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,1,float16,float16,0,0.35073598225911456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,1,float16,fp8,0,0.35653332869211835
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,1,fp8,fp8,0,0.3131733338038127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,1,float16,float16,0,1.086634635925293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,1,float16,fp8,0,1.0664959748586018
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,1,fp8,fp8,0,0.9279146989186605
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,1,float16,float16,0,1.0371413230895996
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,1,float16,fp8,0,1.0825386842091878
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,1,fp8,fp8,0,0.9330346584320068
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,1,float16,float16,0,0.6150933504104614
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,1,float16,fp8,0,0.6068960030873617
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,1,float16,float16,0,0.589141329129537
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,1,fp8,fp8,0,0.5352053244908651
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,1,float16,fp8,0,0.5887999931971232
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,1,fp8,fp8,0,0.5193386475245158
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,1,float16,float16,0,0.5949333508809408
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,1,float16,fp8,0,0.6075733502705892
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,1,float16,float16,0,0.3643733263015747
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,1,fp8,fp8,0,0.521727999051412
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,1,float16,fp8,0,0.36505599816640216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,1,fp8,fp8,0,0.3217066725095113
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,1,float16,float16,0,0.35310932000478107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,1,float16,fp8,0,0.34867199261983234
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,1,fp8,fp8,0,0.3104426662127177
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,1,float16,float16,0,0.35276798407236737
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,1,float16,fp8,0,0.3568640152613322
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,1,fp8,fp8,0,0.3141973416010539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,1,float16,float16,0,0.23278933763504028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,1,float16,fp8,0,0.23176000515619913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,1,float16,float16,0,0.22801067431767783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,1,fp8,fp8,0,0.21026132504145303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,1,float16,fp8,0,0.22868800163269043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,1,fp8,fp8,0,0.20804266134897867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,1,float16,float16,0,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,1,float16,fp8,0,0.23278933763504028
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,1,fp8,fp8,0,0.20736000935236612
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,1,float16,float16,0,0.8016213575998942
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,1,float16,fp8,0,0.7867733637491862
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,1,fp8,fp8,0,0.6901760101318359
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,1,float16,float16,0,0.7947946389516195
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,1,float16,fp8,0,0.8104960123697916
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,1,fp8,fp8,0,0.6946132977803549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,1,float16,float16,0,0.4626773198445638
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,1,float16,fp8,0,0.4582293430964152
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,1,fp8,fp8,0,0.40618666013081867
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,1,float16,float16,0,0.44048531850179035
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,1,float16,fp8,0,0.4466346502304077
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,1,fp8,fp8,0,0.3906559944152832
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,1,float16,float16,0,0.4538026650746663
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,1,float16,fp8,0,0.4490293264389038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,1,fp8,fp8,0,0.39662933349609375
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,1,float16,float16,0,0.2786933382352193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,1,float16,fp8,0,0.2834773262341817
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,1,fp8,fp8,0,0.25224532683690387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,1,float16,float16,0,0.2739199995994568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,1,float16,fp8,0,0.27153066794077557
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,1,fp8,fp8,0,0.24302933613459268
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,1,float16,float16,0,0.27187200387318927
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,1,float16,fp8,0,0.2773333390553792
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,1,fp8,fp8,0,0.24541866779327393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,1,float16,float16,0,0.17322667439778647
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,1,float16,fp8,0,0.17425066232681274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,1,float16,float16,0,0.17083734273910522
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,1,fp8,fp8,0,0.16025599837303162
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,1,float16,fp8,0,0.1723733345667521
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,1,fp8,fp8,0,0.15889066457748413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,1,float16,float16,0,0.1716960072517395
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,1,float16,fp8,0,0.1728853384653727
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,1,fp8,fp8,0,0.15889066457748413
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,1,float16,float16,0,1.0193920135498047
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,1,float16,fp8,0,0.9849119981129965
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,1,fp8,fp8,0,0.8878080050150553
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,1,float16,float16,0,1.042944033940633
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,1,float16,fp8,0,0.9982240200042725
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,1,fp8,fp8,0,0.9014560381571451
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,1,float16,float16,0,0.5744640032450358
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,1,float16,fp8,0,0.5737813313802084
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,1,float16,float16,0,0.5570559899012247
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,1,float16,fp8,0,0.5495466788609823
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,1,fp8,fp8,0,0.5073920090993246
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,1,fp8,fp8,0,0.48793598016103107
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,1,float16,float16,0,0.5536426703135172
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,1,float16,fp8,0,0.5666133165359497
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,1,fp8,fp8,0,0.4944159984588623
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,1,float16,float16,0,0.3271733323733012
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,1,float16,fp8,0,0.33433600266774494
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,1,float16,float16,0,0.31214932600657147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,1,float16,fp8,0,0.3179519971211751
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,1,fp8,fp8,0,0.2957599957784017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,1,fp8,fp8,0,0.28074665864308673
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,1,float16,float16,0,0.32341333230336505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,1,float16,fp8,0,0.3240906596183777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,1,fp8,fp8,0,0.2868906656901042
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,1,float16,float16,0,0.20514132579167685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,1,float16,fp8,0,0.20974934101104736
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,1,float16,float16,0,0.20360533396402994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,1,fp8,fp8,0,0.18653333187103271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,1,float16,fp8,0,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,1,fp8,fp8,0,0.1807360053062439
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,1,float16,float16,0,0.20139199495315552
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,1,float16,fp8,0,0.2063360015551249
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,1,float16,float16,0,0.13141333063443503
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,1,fp8,fp8,0,0.1824480096499125
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,1,float16,fp8,0,0.13397333025932312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,1,float16,fp8,0,0.13328533371289572
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,1,fp8,fp8,0,0.12527466813723245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,1,fp8,fp8,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,1,float16,float16,0,0.13140267133712769
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,1,float16,fp8,0,0.13277332981427512
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,1,fp8,fp8,0,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,1,float16,float16,0,0.6534773508707682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,1,float16,fp8,0,0.644597331682841
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,1,fp8,fp8,0,0.575823982556661
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,1,float16,float16,0,0.652458667755127
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,1,float16,fp8,0,0.6652213335037231
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,1,fp8,fp8,0,0.5806080102920532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,1,float16,float16,0,0.37461332480112713
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,1,float16,fp8,0,0.37426666418711346
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,1,float16,float16,0,0.35447998841603595
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,1,fp8,fp8,0,0.3377546469370524
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,1,float16,fp8,0,0.35788798332214355
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,1,float16,fp8,0,0.35583468278249103
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,1,fp8,fp8,0,0.3203413287798564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,1,float16,float16,0,0.36403199036916095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,1,float16,fp8,0,0.36266668637593585
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,1,fp8,fp8,0,0.3258026639620463
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,1,float16,float16,0,0.21743466456731161
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,1,float16,fp8,0,0.21845332781473795
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,1,fp8,fp8,0,0.20104533433914185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,1,float16,float16,0,0.20616533358891806
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,1,float16,fp8,0,0.2070186734199524
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,1,fp8,fp8,0,0.18756266434987387
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,1,float16,float16,0,0.20906132459640503
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,1,float16,fp8,0,0.20906666914621988
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,1,fp8,fp8,0,0.1919999917348226
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,1,float16,float16,0,0.13482667009035745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,1,float16,fp8,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,1,fp8,fp8,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,1,float16,float16,0,0.13312000036239624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,1,float16,fp8,0,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,1,fp8,fp8,0,0.1232266624768575
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,1,float16,float16,0,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,1,float16,fp8,0,0.1346560021241506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,1,float16,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,1,fp8,fp8,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,1,float16,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,1,fp8,fp8,0,0.09557333588600159
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,1,float16,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,1,fp8,fp8,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,1,float16,float16,0,0.6693546772003174
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,1,float16,fp8,0,0.6645706494649252
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,1,fp8,fp8,0,0.609279990196228
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,1,float16,float16,0,0.6761813163757324
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,1,float16,fp8,0,0.6860799789428711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,1,fp8,fp8,0,0.6184906562169393
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,1,float16,float16,0,0.3800746599833171
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,1,float16,fp8,0,0.3800746599833171
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,1,fp8,fp8,0,0.3490133285522461
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,1,float16,float16,0,0.3568640152613322
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,1,float16,fp8,0,0.36164267857869464
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,1,fp8,fp8,0,0.3264799912770589
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,1,float16,float16,0,0.368127981821696
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,1,float16,fp8,0,0.3660800059636434
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,1,float16,float16,0,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,1,fp8,fp8,0,0.3357013463973999
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,1,float16,fp8,0,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,1,fp8,fp8,0,0.20002132654190063
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,1,float16,float16,0,0.2001919945081075
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,1,float16,fp8,0,0.20206934213638306
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,1,fp8,fp8,0,0.18517333269119263
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,1,float16,float16,0,0.20668266216913858
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,1,float16,fp8,0,0.20565332969029745
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,1,fp8,fp8,0,0.1914880077044169
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,1,float16,float16,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,1,float16,fp8,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,1,float16,float16,0,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,1,fp8,fp8,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,1,float16,fp8,0,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,1,fp8,fp8,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,1,float16,float16,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,1,fp8,fp8,0,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,1,float16,fp8,0,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,1,fp8,fp8,0,0.07715733349323273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,1,fp8,fp8,0,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,1,fp8,fp8,0,0.07066666583220164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,1,float16,float16,0,0.4490293264389038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,1,float16,fp8,0,0.4541440010070801
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,1,fp8,fp8,0,0.42239999771118164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,1,float16,float16,0,0.463701327641805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,1,float16,fp8,0,0.45892266432444256
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,1,fp8,fp8,0,0.4319466749827067
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,1,float16,float16,0,0.25975465774536133
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,1,float16,fp8,0,0.2640213370323181
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,1,fp8,fp8,0,0.24780799945195517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,1,float16,float16,0,0.24644267559051514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,1,float16,fp8,0,0.244053324063619
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,1,fp8,fp8,0,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,1,float16,float16,0,0.2501973311106364
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,1,float16,fp8,0,0.25190399090449017
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,1,float16,float16,0,0.1479680041472117
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,1,fp8,fp8,0,0.23483733336130777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,1,float16,fp8,0,0.14983466267585754
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,1,fp8,fp8,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,1,float16,float16,0,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,1,float16,fp8,0,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,1,fp8,fp8,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,1,float16,float16,0,0.14046399792035422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,1,float16,fp8,0,0.14028799533843994
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,1,fp8,fp8,0,0.13346133629480997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,1,float16,float16,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,1,float16,fp8,0,0.0897706647713979
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,1,float16,float16,0,0.08567466338475545
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,1,float16,fp8,0,0.08533866206804912
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,1,float16,float16,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,1,float16,fp8,0,0.08738133311271667
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,1,float16,float16,0,0.13141866525014242
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,1,float16,float16,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,1,float16,fp8,0,0.06586666901906331
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,1,fp8,fp8,0,0.060421332716941833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,1,float16,float16,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,1,float16,float16,0,0.5273600021998087
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,1,float16,fp8,0,0.5341866811116537
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,1,fp8,fp8,0,0.513861338297526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,1,float16,float16,0,0.5386240084966024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,1,float16,fp8,0,0.5389653444290161
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,1,fp8,fp8,0,0.5213866631189982
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,1,float16,float16,0,0.29815467198689777
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,1,float16,fp8,0,0.3011946678161621
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,1,fp8,fp8,0,0.28893866141637164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,1,float16,float16,0,0.2752853234608968
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,1,float16,fp8,0,0.27665066719055176
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,1,fp8,fp8,0,0.2681173284848531
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,1,float16,float16,0,0.28040534257888794
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,1,float16,fp8,0,0.2845013340314229
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,1,fp8,fp8,0,0.27357866366704303
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,1,float16,float16,0,0.1623146633307139
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,1,fp8,fp8,0,0.15786666671435037
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,1,float16,fp8,0,0.16230400403340658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,1,float16,float16,0,0.14404799540837607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,1,float16,fp8,0,0.1418239971001943
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,1,fp8,fp8,0,0.14455466469128928
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,1,float16,float16,0,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,1,float16,fp8,0,0.15240533153216043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,1,fp8,fp8,0,0.15035733580589294
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,1,float16,float16,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,1,float16,fp8,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,1,fp8,fp8,0,0.09010666608810425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,1,float16,float16,0,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,1,float16,fp8,0,0.08498666683832805
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,1,fp8,fp8,0,0.08293866614500682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,1,float16,fp8,0,0.09010666608810425
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,1,fp8,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,1,float16,fp8,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,1,float16,float16,0,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,1,float16,fp8,0,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,1,float16,float16,0,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,1,float16,fp8,0,0.0481333335240682
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,1,fp8,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,1,fp8,fp8,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,1,float16,float16,0,0.3633386691411336
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,1,float16,fp8,0,0.36300798257191974
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,1,fp8,fp8,0,0.3619893391927083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,1,float16,float16,0,0.37255998452504474
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,1,float16,float16,0,0.2097546656926473
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,1,float16,fp8,0,0.3691519896189372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,1,fp8,fp8,0,0.36949865023295086
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,1,float16,fp8,0,0.2146986722946167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,1,float16,float16,0,0.19387733936309814
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,1,float16,fp8,0,0.1943946679433187
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,1,fp8,fp8,0,0.19660800695419312
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,1,fp8,fp8,0,0.21094399690628052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,1,float16,float16,0,0.19882667064666748
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,1,float16,fp8,0,0.20104533433914185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,1,float16,float16,0,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,1,fp8,fp8,0,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,1,float16,float16,0,0.10547199845314026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,1,float16,fp8,0,0.10717333356539409
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,1,float16,fp8,0,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,1,fp8,fp8,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,1,fp8,fp8,0,0.10684266686439514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,1,float16,float16,0,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,1,float16,fp8,0,0.11162133018175761
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,1,float16,float16,0,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,1,float16,fp8,0,0.07167999943097432
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,1,float16,float16,0,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,1,fp8,fp8,0,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,1,float16,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,1,fp8,fp8,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,1,float16,float16,0,0.06758933266003926
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,1,float16,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,1,fp8,fp8,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,1,float16,fp8,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,1,float16,float16,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,1,float16,fp8,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,1,fp8,fp8,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,1,float16,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,1,float16,float16,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,1,float16,fp8,0,0.03915733347336451
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,1,float16,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,1,float16,float16,0,0.387231985727946
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,1,float16,fp8,0,0.3858773310979207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,1,fp8,fp8,0,0.4034613370895386
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,1,float16,float16,0,0.39372801780700684
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,1,float16,float16,0,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,1,fp8,fp8,0,0.41437331835428876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,1,float16,fp8,0,0.39031465848286945
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,1,float16,fp8,0,0.21435733636220297
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,1,float16,float16,0,0.20240533351898193
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,1,float16,fp8,0,0.20274666945139566
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,1,fp8,fp8,0,0.21059733629226685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,1,fp8,fp8,0,0.22562134265899658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,1,float16,fp8,0,0.2065066695213318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,1,float16,float16,0,0.20992533365885416
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,1,float16,float16,0,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,1,fp8,fp8,0,0.21640533208847046
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,1,float16,float16,0,0.11162133018175761
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,1,float16,fp8,0,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,1,float16,fp8,0,0.11981866757074992
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,1,fp8,fp8,0,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,1,fp8,fp8,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,1,float16,float16,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,1,float16,fp8,0,0.11229333281517029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,1,float16,float16,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,1,float16,fp8,0,0.06791466474533081
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,1,float16,float16,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,1,fp8,fp8,0,0.0699786643187205
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,1,float16,fp8,0,0.06554133196671803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,1,fp8,fp8,0,0.0645066648721695
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,1,float16,float16,0,0.06485866506894429
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,1,float16,fp8,0,0.06554133196671803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,1,fp8,fp8,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,1,float16,fp8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,1,fp8,fp8,0,0.04138666639725367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,1,float16,fp8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,1,fp8,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,1,float16,fp8,0,0.03482133398453394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,1,fp8,fp8,0,0.03482133398453394
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,1,float16,fp8,0,0.034485332667827606
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,1,float16,float16,0,0.33501867453257245
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,1,float16,fp8,0,0.3298986752827962
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,1,fp8,fp8,0,0.36164267857869464
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,1,float16,float16,0,0.34696535269419354
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,1,float16,fp8,0,0.3384373188018799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,1,fp8,fp8,0,0.37085866928100586
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,1,float16,float16,0,0.19268266359965006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,1,float16,fp8,0,0.1890986760457357
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,1,fp8,fp8,0,0.20359466473261514
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,1,float16,float16,0,0.17629865805308023
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,1,float16,fp8,0,0.17356799046198526
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,1,fp8,fp8,0,0.1889280080795288
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,1,float16,float16,0,0.18075199921925864
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,1,float16,fp8,0,0.1787253419558207
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,1,fp8,fp8,0,0.19404800732930502
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,1,float16,float16,0,0.10785599549611409
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,1,float16,fp8,0,0.10547733306884766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,1,fp8,fp8,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,1,float16,float16,0,0.09660266836484273
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,1,float16,fp8,0,0.0962506632010142
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,1,fp8,fp8,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,1,fp8,fp8,0,0.106495996316274
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,1,float16,float16,0,0.057349334160486855
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,1,fp8,fp8,0,0.062458669145902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,1,fp8,fp8,0,0.05666666726271311
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,1,fp8,fp8,0,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,1,float16,float16,0,0.026949333647886913
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,1,float16,fp8,0,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,1,float16,float16,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,1,float16,float16,0,0.02314666658639908
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,1,float16,float16,0,0.1365333298842112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,1,float16,fp8,0,0.13772799571355185
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,1,fp8,fp8,0,0.1469439963499705
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,1,fp8,fp8,0,0.15172266960144043
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,1,float16,float16,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,1,float16,fp8,0,0.08636266986529033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,1,float16,float16,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,1,fp8,fp8,0,0.09318932890892029
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,1,float16,float16,0,0.07748800019423167
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,1,float16,fp8,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,1,fp8,fp8,0,0.08636266986529033
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,1,float16,fp8,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,1,float16,float16,0,0.04674666623274485
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,1,float16,fp8,0,0.045408000548680626
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,1,float16,float16,0,0.032442666590213776
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,1,float16,float16,0,0.024847999215126038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,1,float16,float16,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,1,float16,fp8,0,0.022175999979178112
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,1,fp8,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,1,float16,float16,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,1,fp8,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,1,float16,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,1,fp8,fp8,0,0.020746666938066483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,1,float16,float16,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,1,float16,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,1,fp8,fp8,0,0.020768000433842342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,1,float16,float16,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,1,fp8,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,1,float16,float16,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,1,float16,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,1,fp8,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,1,float16,float16,0,0.020768000433842342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,1,float16,float16,0,0.020655999581019085
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,1,float16,fp8,0,0.020746666938066483
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,1,fp8,fp8,0,0.020762667059898376
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,1,float16,float16,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,1,float16,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,1,float16,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,1,float16,float16,0,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,1,fp8,fp8,0,0.08739200234413147
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,1,float16,fp8,0,0.0484799991051356
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,1,float16,fp8,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,1,fp8,fp8,0,0.031040000418821972
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,1,float16,fp8,0,0.032773333291212715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,1,float16,float16,0,0.018661333868900936
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,1,fp8,fp8,0,0.016645333419243496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,1,float16,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,1,float16,float16,0,0.016544000556071598
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,1,fp8,fp8,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,1,float16,float16,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,1,float16,float16,0,0.03857066730658213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,1,float16,fp8,0,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,1,float16,fp8,0,0.020725333442290623
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,1,fp8,fp8,0,0.02075733368595441
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,1,float16,float16,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,1,fp8,fp8,0,0.016544000556071598
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,1,float16,float16,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,1,fp8,fp8,0,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,1,float16,float16,0,0.03309333324432373
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,1,float16,float16,0,0.02314666658639908
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,1,float16,float16,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,1,float16,float16,0,0.018437333405017853
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,1,float16,fp8,0,0.01855466639002164
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,1,float16,fp8,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,1,fp8,fp8,0,0.01657066618402799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,1,float16,float16,0,0.015967999895413715
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,1,float16,float16,0,0.04470400015513102
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,1,float16,fp8,0,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,1,float16,float16,0,0.029338667790095013
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,1,float16,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,1,float16,fp8,0,0.018751999984184902
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,1,fp8,fp8,0,0.018687999496857326
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,1,float16,fp8,0,0.018378666291634243
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,1,float16,fp8,0,0.01657066618402799
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,1,float16,float16,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,1,float16,float16,0,0.016677333662907284
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,1,float16,float16,0,0.01646399994691213
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,1,float16,fp8,0,0.01851733277241389
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,1,float16,float16,0,0.016458666572968166
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,1,float16,fp8,0,0.016688000410795212
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,1,float16,float16,0,0.01661866654952367
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,1,float16,fp8,0,0.016623999923467636
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,1,float16,float16,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,1,float16,float16,0,0.03105599929889043
