framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,96,2,128,1,fp8,fp8,0,72.96143086751302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,96,1,128,1,fp8,fp8,0,72.85207621256511
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,96,4,128,1,fp8,fp8,0,71.35386657714844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,128,1,float16,float16,0,49.97113545735677
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,128,1,float16,fp8,0,49.99133809407552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,96,8,128,1,fp8,fp8,0,72.05377197265625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,128,1,fp8,fp8,0,33.2641855875651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,128,1,float16,float16,0,50.226704915364586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,128,1,float16,float16,0,50.01563008626302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,128,1,float16,fp8,0,50.12334696451823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,128,1,float16,fp8,0,50.166534423828125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,128,1,fp8,fp8,0,33.21981302897135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,128,1,float16,float16,0,50.22521464029948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,128,1,float16,fp8,0,49.91356913248698
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,128,1,fp8,fp8,0,33.692891438802086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,128,1,float16,float16,0,50.1163584391276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,128,1,float16,fp8,0,50.135965983072914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,128,1,float16,float16,0,25.59479014078776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,128,1,fp8,fp8,0,33.221717834472656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,128,1,float16,fp8,0,25.425582885742188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,128,1,fp8,fp8,0,16.78338114420573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,128,1,float16,float16,0,25.693936665852863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,128,1,float16,fp8,0,25.28014882405599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,128,1,fp8,fp8,0,17.141050974527996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,128,1,float16,float16,0,25.8818359375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,128,1,fp8,fp8,0,16.740453084309895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,128,1,float16,fp8,0,25.163497924804688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,128,1,float16,float16,0,25.491844177246094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,128,1,float16,fp8,0,25.063364664713543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,128,1,fp8,fp8,0,17.061594645182293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,128,1,float16,float16,0,25.39344533284505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,128,1,float16,float16,0,12.897008260091146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,128,1,fp8,fp8,0,17.208805084228516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,128,1,float16,fp8,0,25.2684809366862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,128,1,float16,fp8,0,13.133920033772787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,128,1,fp8,fp8,0,8.835802714029947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,128,1,float16,float16,0,12.909322102864584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,128,1,float16,float16,0,12.946389516194662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,128,1,float16,fp8,0,12.901925404866537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,128,1,fp8,fp8,0,8.871327718098959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,128,1,fp8,fp8,0,8.729434967041016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,128,1,float16,fp8,0,13.146400451660156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,128,1,float16,float16,0,13.137045542399088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,128,1,float16,fp8,0,12.911226908365885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,128,1,fp8,fp8,0,8.692250569661459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,128,1,float16,float16,0,12.94100824991862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,128,1,float16,fp8,0,12.937488555908203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,128,1,fp8,fp8,0,8.69107755025228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,128,1,float16,float16,0,59.98489888509115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,128,1,fp8,fp8,0,39.139862060546875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,128,1,float16,fp8,0,62.62706502278646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,128,1,float16,float16,0,60.46369425455729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,128,1,float16,fp8,0,60.025594075520836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,128,1,float16,float16,0,60.09760538736979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,128,1,fp8,fp8,0,38.92243194580078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,128,1,float16,fp8,0,60.36670939127604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,128,1,float16,float16,0,29.1332270304362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,128,1,fp8,fp8,0,38.954121907552086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,128,1,float16,float16,0,62.22354634602865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,128,1,float16,fp8,0,61.75895690917969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,128,1,fp8,fp8,0,39.02717844645182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,128,1,float16,fp8,0,29.041765848795574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,128,1,float16,fp8,0,29.171658833821613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,128,1,fp8,fp8,0,19.659119923909504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,128,1,float16,float16,0,28.973988850911457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,128,1,float16,float16,0,29.419606526692707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,128,1,fp8,fp8,0,19.979573567708332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,128,1,float16,fp8,0,29.307637532552082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,128,1,fp8,fp8,0,19.832324981689453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,128,1,float16,float16,0,29.178250630696613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,128,1,float16,fp8,0,28.966959635416668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,128,1,fp8,fp8,0,19.62806447347005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,128,1,float16,fp8,0,29.424550374348957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,128,1,float16,float16,0,28.902089436848957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,128,1,fp8,fp8,0,19.71298090616862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,128,1,float16,float16,0,14.69919459025065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,128,1,float16,fp8,0,14.630650838216146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,128,1,fp8,fp8,0,10.01154645284017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,128,1,float16,fp8,0,14.725120544433594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,128,1,float16,float16,0,15.06924819946289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,128,1,float16,float16,0,14.658485412597656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,128,1,fp8,fp8,0,9.903877258300781
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,128,1,fp8,fp8,0,10.209802627563477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,128,1,float16,fp8,0,14.57483164469401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,128,1,float16,float16,0,14.742293039957682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,128,1,float16,fp8,0,14.812335968017578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,128,1,fp8,fp8,0,9.957199732462565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,128,1,float16,float16,0,15.02029291788737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,128,1,float16,fp8,0,14.549631754557291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,128,1,float16,float16,0,7.533498764038086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,128,1,float16,fp8,0,7.393861134847005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,128,1,fp8,fp8,0,9.944175720214844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,128,1,fp8,fp8,0,5.083210627237956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,128,1,float16,float16,0,7.484202702840169
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,128,1,float16,float16,0,7.6149546305338545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,128,1,float16,fp8,0,7.579914728800456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,128,1,float16,fp8,0,7.660495758056641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,128,1,fp8,fp8,0,5.294336001078288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,128,1,fp8,fp8,0,5.061312039693196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,128,1,float16,float16,0,7.496026357014974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,128,1,float16,fp8,0,7.550048192342122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,128,1,fp8,fp8,0,5.085381189982097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,128,1,float16,float16,0,7.742122650146484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,128,1,fp8,fp8,0,5.215440114339192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,128,1,float16,fp8,0,7.582090377807617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,128,1,fp8,fp8,0,27.924916585286457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,128,1,float16,fp8,0,40.59442647298177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,128,1,float16,float16,0,40.976539611816406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,128,1,fp8,fp8,0,27.98748779296875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,128,1,float16,float16,0,41.89214833577474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,128,1,float16,fp8,0,40.61248524983724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,128,1,float16,float16,0,41.30241139729818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,128,1,float16,fp8,0,40.739898681640625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,128,1,float16,float16,0,20.51263427734375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,128,1,fp8,fp8,0,27.914751688639324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,128,1,float16,float16,0,41.04710896809896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,128,1,float16,fp8,0,40.74628702799479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,128,1,fp8,fp8,0,28.219622294108074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,128,1,float16,fp8,0,20.495450337727863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,128,1,fp8,fp8,0,13.96511967976888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,128,1,fp8,fp8,0,14.656176249186197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,128,1,float16,fp8,0,20.406869252522785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,128,1,float16,float16,0,21.161717732747395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,128,1,float16,float16,0,20.56540298461914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,128,1,float16,fp8,0,20.40875244140625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,128,1,fp8,fp8,0,14.014373779296875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,128,1,float16,float16,0,20.607135772705078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,128,1,float16,fp8,0,20.589642842610676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,128,1,fp8,fp8,0,14.137327829996744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,128,1,float16,float16,0,20.39532216389974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,128,1,float16,fp8,0,20.92433039347331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,128,1,float16,float16,0,10.382975896199545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,128,1,fp8,fp8,0,14.138496398925781
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,128,1,float16,fp8,0,10.521957397460938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,128,1,fp8,fp8,0,7.091663996378581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,128,1,float16,float16,0,10.524208068847656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,128,1,float16,float16,0,10.624261220296225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,128,1,float16,fp8,0,10.44054921468099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,128,1,fp8,fp8,0,7.07421875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,128,1,float16,float16,0,10.749893188476562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,128,1,fp8,fp8,0,7.2157440185546875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,128,1,float16,fp8,0,10.617626825968424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,128,1,fp8,fp8,0,7.1014556884765625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,128,1,float16,fp8,0,10.398181279500326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,128,1,float16,float16,0,10.594778696695963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,128,1,float16,fp8,0,10.41049067179362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,128,1,float16,float16,0,5.390746434529622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,128,1,float16,fp8,0,5.118277231852214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,128,1,fp8,fp8,0,7.126506805419922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,128,1,fp8,fp8,0,3.730016072591146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,128,1,float16,float16,0,5.4050242106119795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,128,1,float16,float16,0,5.353381474812825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,128,1,float16,fp8,0,5.337231953938802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,128,1,fp8,fp8,0,3.7022666931152344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,128,1,float16,fp8,0,5.173882802327474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,128,1,float16,float16,0,5.303743998209636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,128,1,float16,fp8,0,5.31710942586263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,128,1,fp8,fp8,0,3.7208213806152344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,128,1,fp8,fp8,0,3.7810134887695312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,128,1,float16,float16,0,5.474559783935547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,128,1,float16,fp8,0,5.24399471282959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,128,1,fp8,fp8,0,3.7178932825724282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,96,1,128,1,fp8,fp8,0,37.83484141031901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,96,2,128,1,fp8,fp8,0,36.96753184000651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,96,4,128,1,fp8,fp8,0,37.72326914469401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,128,1,float16,float16,0,27.14366404215495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,128,1,float16,fp8,0,26.85205841064453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,96,8,128,1,fp8,fp8,0,37.346048990885414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,128,1,fp8,fp8,0,18.489696502685547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,128,1,float16,float16,0,26.982192993164062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,128,1,float16,float16,0,27.68854522705078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,128,1,float16,fp8,0,26.94225565592448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,128,1,float16,fp8,0,27.89818064371745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,128,1,float16,float16,0,27.101582845052082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,128,1,float16,fp8,0,26.80821990966797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,128,1,fp8,fp8,0,18.713024139404297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,128,1,fp8,fp8,0,18.88917287190755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,128,1,float16,float16,0,27.207664489746094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,128,1,float16,fp8,0,27.5631841023763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,128,1,float16,float16,0,13.524363199869791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,128,1,fp8,fp8,0,18.85280481974284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,128,1,float16,fp8,0,13.72656504313151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,128,1,fp8,fp8,0,9.353888193766275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,128,1,float16,float16,0,13.667439778645834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,128,1,float16,float16,0,13.695050557454428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,128,1,fp8,fp8,0,9.793173472086588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,128,1,float16,fp8,0,13.772064208984375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,128,1,float16,fp8,0,13.636720021565756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,128,1,fp8,fp8,0,9.593999862670898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,128,1,float16,float16,0,13.566000620524088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,128,1,float16,fp8,0,13.494560241699219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,128,1,fp8,fp8,0,9.269989649454752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,128,1,float16,float16,0,6.81060791015625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,128,1,float16,float16,0,13.652042388916016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,128,1,float16,fp8,0,13.718512217203775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,128,1,float16,fp8,0,6.772133509318034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,128,1,fp8,fp8,0,9.339802424112955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,128,1,float16,float16,0,6.948832194010417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,128,1,float16,fp8,0,7.013834635416667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,128,1,fp8,fp8,0,4.747050603230794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,128,1,float16,float16,0,6.8819624582926435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,128,1,fp8,fp8,0,4.721749305725098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,128,1,float16,fp8,0,6.705237070719401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,128,1,fp8,fp8,0,4.926597277323405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,128,1,float16,float16,0,6.821818669637044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,128,1,float16,fp8,0,6.9435469309488935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,128,1,fp8,fp8,0,4.864314715067546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,128,1,fp8,fp8,0,4.730607986450195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,128,1,float16,float16,0,3.4840161005655923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,128,1,float16,float16,0,6.9729868570963545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,128,1,float16,fp8,0,6.847024281819661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,128,1,float16,fp8,0,3.4785760243733725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,128,1,float16,float16,0,3.561999956766764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,128,1,float16,fp8,0,3.5153652826944985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,128,1,fp8,fp8,0,2.5579946835835776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,128,1,float16,float16,0,3.524928092956543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,128,1,float16,fp8,0,3.450261433919271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,128,1,fp8,fp8,0,2.6268159548441568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,128,1,fp8,fp8,0,2.557055950164795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,128,1,float16,float16,0,3.556960105895996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,128,1,float16,fp8,0,3.5148213704427085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,128,1,fp8,fp8,0,2.566090742746989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,128,1,fp8,fp8,0,2.5665225982666016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,128,1,float16,fp8,0,3.4702399571736655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,128,1,float16,float16,0,3.4846293131510415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,128,1,fp8,fp8,0,22.797439575195312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,128,1,fp8,fp8,0,22.30760955810547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,96,4,128,1,fp8,fp8,0,22.856597900390625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,128,1,float16,float16,0,16.129839579264324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,128,1,float16,fp8,0,15.790602366129557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,96,8,128,1,fp8,fp8,0,22.357930501302082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,128,1,float16,float16,0,16.28970718383789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,128,1,fp8,fp8,0,11.388863881429037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,128,1,float16,float16,0,15.926709493001303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,128,1,float16,fp8,0,15.739252726236979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,128,1,float16,fp8,0,16.1376215616862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,128,1,fp8,fp8,0,11.22329076131185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,128,1,float16,fp8,0,16.003376007080078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,128,1,fp8,fp8,0,11.84177017211914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,128,1,float16,float16,0,15.931439717610678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,128,1,fp8,fp8,0,11.22769546508789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,128,1,float16,float16,0,15.921012878417969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,128,1,float16,float16,0,7.966026941935222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,128,1,float16,fp8,0,7.912218729654948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,128,1,float16,fp8,0,15.767834981282553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,128,1,fp8,fp8,0,11.185882568359375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,128,1,fp8,fp8,0,5.567402521769206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,128,1,float16,float16,0,7.954720179239909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,128,1,float16,fp8,0,7.856581370035808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,128,1,fp8,fp8,0,5.5633494059244795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,128,1,float16,fp8,0,8.35263442993164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,128,1,float16,fp8,0,7.9723466237386065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,128,1,float16,float16,0,8.002960205078125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,128,1,fp8,fp8,0,5.9974721272786455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,128,1,float16,float16,0,8.353738784790039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,128,1,fp8,fp8,0,5.598901112874349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,128,1,float16,float16,0,8.065125147501627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,128,1,float16,fp8,0,7.899109522501628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,128,1,float16,float16,0,3.9873758951822915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,128,1,fp8,fp8,0,5.625146865844727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,128,1,float16,fp8,0,3.8818718592325845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,128,1,fp8,fp8,0,2.931023915608724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,128,1,float16,fp8,0,3.9310134251912436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,128,1,float16,float16,0,3.9921013514200845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,128,1,float16,float16,0,4.084410667419434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,128,1,fp8,fp8,0,2.9330771764119468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,128,1,fp8,fp8,0,3.0576213200887046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,128,1,float16,float16,0,4.060970624287923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,128,1,float16,fp8,0,3.99452273050944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,128,1,float16,fp8,0,3.8826611836751304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,128,1,fp8,fp8,0,2.9380693435668945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,128,1,float16,float16,0,3.9368321100870767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,128,1,float16,fp8,0,3.981893221537272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,128,1,fp8,fp8,0,2.9528748194376626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,128,1,float16,float16,0,2.1253226598103843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,128,1,float16,fp8,0,2.110960006713867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,128,1,fp8,fp8,0,1.6157280604044597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,128,1,float16,fp8,0,2.1206080118815103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,128,1,float16,float16,0,2.144890626271566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,128,1,fp8,fp8,0,1.6755733489990234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,128,1,float16,fp8,0,2.1118133862813315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,128,1,fp8,fp8,0,1.617136001586914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,128,1,float16,float16,0,2.1296745936075845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,128,1,float16,float16,0,2.151264031728109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,128,1,float16,fp8,0,2.091872056325277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,128,1,fp8,fp8,0,1.6205760637919109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,128,1,float16,fp8,0,2.10750404993693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,128,1,fp8,fp8,0,1.6262186368306477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,128,1,float16,float16,0,2.1358826955159507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,96,1,128,1,fp8,fp8,0,22.96095021565755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,96,2,128,1,fp8,fp8,0,22.575551350911457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,96,4,128,1,fp8,fp8,0,22.76245880126953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,128,1,float16,float16,0,15.42312494913737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,128,1,float16,fp8,0,15.270549774169922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,128,1,fp8,fp8,0,11.275792439778646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,96,8,128,1,fp8,fp8,0,22.83373769124349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,128,1,float16,float16,0,15.550954182942709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,128,1,float16,float16,0,16.002532958984375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,128,1,fp8,fp8,0,11.140154520670572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,128,1,float16,fp8,0,15.87396240234375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,128,1,float16,fp8,0,15.176804860432943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,128,1,float16,float16,0,15.647642771402994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,128,1,float16,fp8,0,15.40488052368164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,128,1,fp8,fp8,0,11.28164291381836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,128,1,float16,float16,0,15.442843119303385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,128,1,float16,fp8,0,15.254384358723959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,128,1,fp8,fp8,0,11.267300923665365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,128,1,float16,float16,0,8.232965469360352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,128,1,float16,float16,0,7.78879992167155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,128,1,float16,fp8,0,7.600869496663411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,128,1,fp8,fp8,0,5.586890538533528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,128,1,fp8,fp8,0,5.9904429117838545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,128,1,float16,float16,0,7.4947465260823565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,128,1,fp8,fp8,0,5.604570388793945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,128,1,float16,fp8,0,7.6267038981119795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,128,1,float16,float16,0,7.644784291585286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,128,1,float16,fp8,0,8.29793612162272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,128,1,fp8,fp8,0,5.60423469543457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,128,1,float16,fp8,0,7.507146835327148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,128,1,float16,float16,0,7.67250124613444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,128,1,fp8,fp8,0,5.6266295115153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,128,1,float16,float16,0,3.7784694035847983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,128,1,float16,fp8,0,7.5699412027994795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,128,1,float16,fp8,0,3.7139838536580405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,128,1,float16,fp8,0,3.915424029032389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,128,1,fp8,fp8,0,2.8937066396077475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,128,1,float16,float16,0,3.9899733861287436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,128,1,float16,fp8,0,3.763530731201172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,128,1,fp8,fp8,0,2.9034506479899087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,128,1,float16,float16,0,3.763312021891276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,128,1,float16,float16,0,3.774143854777018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,128,1,fp8,fp8,0,3.116389274597168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,128,1,float16,fp8,0,3.6972373326619468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,128,1,fp8,fp8,0,2.9028107325236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,128,1,float16,float16,0,3.811237335205078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,128,1,fp8,fp8,0,2.913546562194824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,128,1,float16,fp8,0,3.7122507095336914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,128,1,float16,float16,0,1.9841866493225098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,128,1,float16,fp8,0,1.9497973124186199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,128,1,fp8,fp8,0,1.5531946818033855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,128,1,float16,fp8,0,2.0125706990559897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,128,1,float16,float16,0,2.0307626724243164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,128,1,float16,float16,0,1.997221310933431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,128,1,fp8,fp8,0,1.6504799524943035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,128,1,float16,fp8,0,1.949285348256429
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,128,1,fp8,fp8,0,1.5490293502807617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,128,1,fp8,fp8,0,1.553317387898763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,128,1,float16,fp8,0,1.9657333691914876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,128,1,float16,float16,0,1.994917392730713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,128,1,float16,float16,0,1.9941867192586262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,128,1,float16,fp8,0,1.9629759788513184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,128,1,float16,fp8,0,1.0824000040690105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,128,1,fp8,fp8,0,1.5622933705647786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,128,1,float16,float16,0,1.109498659769694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,128,1,fp8,fp8,0,0.8828746477762858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,128,1,float16,float16,0,1.1139946778615315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,128,1,float16,float16,0,1.109007994333903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,128,1,float16,fp8,0,1.0979519685109456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,128,1,float16,fp8,0,1.0863146781921387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,128,1,fp8,fp8,0,0.8835253715515137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,128,1,float16,float16,0,1.1116586526234944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,128,1,float16,fp8,0,1.0850133101145427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,128,1,fp8,fp8,0,0.9256426493326823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,128,1,fp8,fp8,0,0.8833866914113363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,128,1,float16,float16,0,1.1093599796295166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,128,1,float16,fp8,0,1.0830026467641194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,128,1,fp8,fp8,0,0.8896106878916422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,96,1,128,1,fp8,fp8,0,14.180304209391275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,96,2,128,1,fp8,fp8,0,14.098555246988932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,96,4,128,1,fp8,fp8,0,14.108015696207682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,128,1,float16,float16,0,9.339775721232096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,128,1,float16,fp8,0,9.233919779459635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,128,1,fp8,fp8,0,7.056912104288737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,128,1,float16,float16,0,9.346736272176107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,96,8,128,1,fp8,fp8,0,14.193594614664713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,128,1,float16,float16,0,9.927866617838541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,128,1,float16,fp8,0,9.864197413126627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,128,1,float16,fp8,0,9.30848503112793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,128,1,fp8,fp8,0,7.05349858601888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,128,1,fp8,fp8,0,7.876448313395183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,128,1,float16,float16,0,9.231274922688803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,128,1,float16,fp8,0,9.190869649251303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,128,1,fp8,fp8,0,7.091552098592122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,128,1,float16,float16,0,9.397626876831055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,128,1,float16,fp8,0,9.221743901570639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,128,1,fp8,fp8,0,7.110005060831706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,128,1,float16,float16,0,4.5495147705078125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,128,1,float16,fp8,0,4.471536000569661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,128,1,fp8,fp8,0,3.5763041178385415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,128,1,float16,fp8,0,4.506613413492839
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,128,1,float16,float16,0,4.805189450581868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,128,1,float16,float16,0,4.597690582275391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,128,1,float16,fp8,0,4.827055931091309
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,128,1,fp8,fp8,0,3.5885547002156577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,128,1,fp8,fp8,0,3.890752156575521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,128,1,float16,float16,0,4.622762680053711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,128,1,float16,fp8,0,4.451231956481934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,128,1,fp8,fp8,0,3.5915145874023438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,128,1,fp8,fp8,0,3.602191925048828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,128,1,float16,fp8,0,4.601471900939941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,128,1,float16,float16,0,4.649855931599935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,128,1,float16,float16,0,2.341887950897217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,128,1,float16,fp8,0,2.2955946922302246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,128,1,fp8,fp8,0,1.8672746022542317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,128,1,float16,fp8,0,2.314746697743734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,128,1,float16,float16,0,2.3504640261332193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,128,1,float16,float16,0,2.425877412160238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,128,1,float16,fp8,0,2.399829387664795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,128,1,fp8,fp8,0,2.043989340464274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,128,1,fp8,fp8,0,1.8719520568847656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,128,1,float16,float16,0,2.345717271169027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,128,1,float16,fp8,0,2.2968266805013022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,128,1,fp8,fp8,0,1.878010590871175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,128,1,float16,float16,0,2.357936064402262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,128,1,float16,fp8,0,2.3146185874938965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,128,1,fp8,fp8,0,1.8811146418253581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,128,1,float16,float16,0,1.2536053657531738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,128,1,float16,fp8,0,1.2310293515523274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,128,1,fp8,fp8,0,1.0151306788126628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,128,1,float16,float16,0,1.304085334142049
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,128,1,fp8,fp8,0,1.0916213194529216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,128,1,float16,fp8,0,1.2769707043965657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,128,1,float16,float16,0,1.2578879992167156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,128,1,float16,fp8,0,1.2265866597493489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,128,1,float16,fp8,0,1.2296106815338135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,128,1,fp8,fp8,0,1.0160266558329265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,128,1,fp8,fp8,0,1.0189706484476726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,128,1,float16,float16,0,1.2630933125813801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,128,1,float16,float16,0,1.2652426560719807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,128,1,float16,fp8,0,1.2352266311645508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,128,1,fp8,fp8,0,1.0212960243225098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,128,1,float16,float16,0,0.7160373528798422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,128,1,float16,float16,0,0.7212746938069662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,128,1,float16,fp8,0,0.6972320079803467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,128,1,fp8,fp8,0,0.5910666783650717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,128,1,fp8,fp8,0,0.6231093406677246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,128,1,float16,fp8,0,0.7156639893849691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,128,1,float16,float16,0,0.7181866963704427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,128,1,fp8,fp8,0,0.5906986792882284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,128,1,float16,fp8,0,0.697978655497233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,128,1,fp8,fp8,0,0.5927306811014811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,128,1,float16,float16,0,0.7144693533579508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,128,1,float16,fp8,0,0.6998399893442789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,128,1,float16,float16,0,0.716101328531901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,128,1,float16,fp8,0,0.6998773415883383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,128,1,fp8,fp8,0,0.5957599878311157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,96,1,128,1,fp8,fp8,0,15.419845581054688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,96,2,128,1,fp8,fp8,0,15.43935521443685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,96,4,128,1,fp8,fp8,0,15.464122772216797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,128,1,float16,fp8,0,9.415199915568033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,128,1,float16,float16,0,9.572767893473307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,128,1,fp8,fp8,0,7.698586781819661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,128,1,float16,float16,0,9.561418533325195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,96,8,128,1,fp8,fp8,0,15.530687967936197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,128,1,float16,float16,0,10.561301549275717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,128,1,float16,fp8,0,10.097808202107748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,128,1,float16,fp8,0,9.351018905639648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,128,1,fp8,fp8,0,7.709717432657878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,128,1,float16,fp8,0,9.51047452290853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,128,1,float16,float16,0,9.607274373372396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,128,1,fp8,fp8,0,7.73192024230957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,128,1,float16,float16,0,9.682005564371744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,128,1,fp8,fp8,0,7.7736053466796875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,128,1,float16,fp8,0,9.517301559448242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,128,1,float16,float16,0,5.0539201100667315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,128,1,float16,float16,0,4.674624125162761
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,128,1,float16,fp8,0,4.560256004333496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,128,1,fp8,fp8,0,3.8520854314168296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,128,1,float16,fp8,0,4.89628791809082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,128,1,float16,float16,0,4.660160064697266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,128,1,float16,fp8,0,4.5558773676554365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,128,1,fp8,fp8,0,4.285429318745931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,128,1,float16,float16,0,4.648954709370931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,128,1,fp8,fp8,0,3.85698668162028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,128,1,float16,fp8,0,4.6272532145182295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,128,1,fp8,fp8,0,3.8642613093058267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,128,1,float16,fp8,0,4.5617014567057295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,128,1,fp8,fp8,0,3.894591967264811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,128,1,float16,float16,0,4.687173207600911
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,128,1,float16,float16,0,2.3789706230163574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,128,1,float16,fp8,0,2.3179574012756348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,128,1,float16,float16,0,2.388058662414551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,128,1,float16,float16,0,2.5614986419677734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,128,1,fp8,fp8,0,1.9798773129781086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,128,1,float16,fp8,0,2.322085380554199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,128,1,float16,fp8,0,2.4764480590820312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,128,1,fp8,fp8,0,1.9845973650614421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,128,1,float16,float16,0,2.381338596343994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,128,1,float16,fp8,0,2.3342080116271973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,128,1,fp8,fp8,0,2.2007519404093423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,128,1,fp8,fp8,0,1.995946725209554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,128,1,float16,float16,0,2.3915252685546875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,128,1,float16,fp8,0,2.335007985432943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,128,1,fp8,fp8,0,1.9972480138142903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,128,1,float16,fp8,0,1.2191306749979656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,128,1,float16,float16,0,1.2463093598683674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,128,1,fp8,fp8,0,1.046458641688029
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,128,1,float16,float16,0,1.3058026631673176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,128,1,float16,float16,0,1.2505813439687092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,128,1,float16,fp8,0,1.2981279691060383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,128,1,float16,fp8,0,1.219882647196452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,128,1,fp8,fp8,0,1.1679360071818035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,128,1,fp8,fp8,0,1.0531573295593262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,128,1,float16,float16,0,1.250810702641805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,128,1,fp8,fp8,0,1.051690657933553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,128,1,float16,fp8,0,1.2191946506500244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,128,1,float16,float16,0,1.2538560231526692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,128,1,float16,fp8,0,1.224010705947876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,128,1,float16,float16,0,0.6851627031962076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,128,1,float16,fp8,0,0.6627200047175089
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,128,1,fp8,fp8,0,1.0570560296376545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,128,1,fp8,fp8,0,0.5809599955876669
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,128,1,float16,float16,0,0.7103412946065267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,128,1,float16,fp8,0,0.7024532953898112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,128,1,float16,float16,0,0.6849173704783121
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,128,1,float16,fp8,0,0.6655253171920776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,128,1,float16,fp8,0,0.6670666535695394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,128,1,fp8,fp8,0,0.5801493326822916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,128,1,fp8,fp8,0,0.5831466515858968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,128,1,float16,float16,0,0.6854133605957031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,128,1,fp8,fp8,0,0.6334240039189657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,128,1,float16,float16,0,0.6882452964782715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,128,1,float16,fp8,0,0.6679519812266032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,128,1,fp8,fp8,0,0.5876533190409342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,128,1,float16,fp8,0,0.38967466354370117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,128,1,float16,float16,0,0.4129279851913452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,128,1,float16,float16,0,0.4016266663869222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,128,1,fp8,fp8,0,0.36744534969329834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,128,1,fp8,fp8,0,0.336575984954834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,128,1,float16,float16,0,0.40187732378641766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,128,1,float16,fp8,0,0.4040373166402181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,128,1,float16,fp8,0,0.39164264996846515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,128,1,fp8,fp8,0,0.3348960081736247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,128,1,float16,float16,0,0.4038613239924113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,128,1,fp8,fp8,0,0.33638401826222736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,128,1,float16,float16,0,0.4063520034154256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,128,1,float16,fp8,0,0.390341321627299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,128,1,float16,fp8,0,0.39235198497772217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,128,1,fp8,fp8,0,0.3365600109100342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,128,1,fp8,fp8,0,10.301631927490234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,96,2,128,1,fp8,fp8,0,10.312608083089193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,96,4,128,1,fp8,fp8,0,10.341610590616861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,128,1,float16,float16,0,5.950960159301758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,128,1,float16,fp8,0,5.868480046590169
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,128,1,fp8,fp8,0,10.398031870524088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,128,1,fp8,fp8,0,5.123269399007161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,128,1,float16,float16,0,6.673418680826823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,128,1,float16,float16,0,5.956991831461589
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,128,1,float16,fp8,0,6.534165064493815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,128,1,fp8,fp8,0,5.746208190917969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,128,1,float16,fp8,0,6.019754409790039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,128,1,fp8,fp8,0,5.126511891682942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,128,1,float16,float16,0,5.99234135945638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,128,1,fp8,fp8,0,5.152463912963867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,128,1,float16,fp8,0,5.931221644083659
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,128,1,float16,float16,0,3.0028905868530273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,128,1,float16,float16,0,6.025637308756511
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,128,1,fp8,fp8,0,5.190789222717285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,128,1,float16,fp8,0,5.8549760182698565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,128,1,fp8,fp8,0,2.5810240109761557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,128,1,float16,fp8,0,2.924015998840332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,128,1,float16,float16,0,3.006629308064779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,128,1,float16,fp8,0,3.1946452458699546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,128,1,float16,float16,0,3.209312121073405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,128,1,float16,fp8,0,2.927877426147461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,128,1,fp8,fp8,0,2.9155146280924478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,128,1,fp8,fp8,0,2.5840214093526206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,128,1,float16,fp8,0,2.932703971862793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,128,1,fp8,fp8,0,2.5968000094095864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,128,1,float16,float16,0,3.0079520543416343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,128,1,float16,float16,0,3.018272082010905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,128,1,float16,fp8,0,2.937749226888021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,128,1,fp8,fp8,0,2.615957260131836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,128,1,float16,float16,0,1.5507465998331706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,128,1,float16,float16,0,1.6513066291809082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,128,1,fp8,fp8,0,1.3390186627705891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,128,1,float16,float16,0,1.5493706067403157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,128,1,float16,fp8,0,1.5072746276855469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,128,1,float16,fp8,0,1.6288053194681804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,128,1,float16,fp8,0,1.5100639661153157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,128,1,fp8,fp8,0,1.3434453010559082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,128,1,fp8,fp8,0,1.509866714477539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,128,1,float16,float16,0,1.5522346496582031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,128,1,float16,fp8,0,1.5102720260620117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,128,1,fp8,fp8,0,1.3427039782206218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,128,1,fp8,fp8,0,1.3561174074808757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,128,1,float16,float16,0,1.5591039657592773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,128,1,float16,fp8,0,1.5144480069478352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,128,1,float16,float16,0,0.8213760058085123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,128,1,float16,fp8,0,0.7959893544514974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,128,1,float16,float16,0,0.8698133627573649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,128,1,fp8,fp8,0,0.717141310373942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,128,1,float16,float16,0,0.8209706942240397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,128,1,float16,fp8,0,0.7989760239919027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,128,1,float16,fp8,0,0.862335999806722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,128,1,fp8,fp8,0,0.8056480089823405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,128,1,fp8,fp8,0,0.7179360389709473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,128,1,float16,float16,0,0.8218186696370443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,128,1,float16,fp8,0,0.802079995473226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,128,1,fp8,fp8,0,0.7213173707326254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,128,1,float16,fp8,0,0.801093339920044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,128,1,float16,float16,0,0.8250613212585449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,128,1,float16,float16,0,0.4586346546808879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,128,1,float16,fp8,0,0.444048007329305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,128,1,fp8,fp8,0,0.721664031346639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,128,1,fp8,fp8,0,0.4021386702855428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,128,1,float16,float16,0,0.47729067007700604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,128,1,float16,fp8,0,0.4714826742808024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,128,1,fp8,fp8,0,0.442138671875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,128,1,float16,float16,0,0.45929598808288574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,128,1,float16,fp8,0,0.44460801283518475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,128,1,fp8,fp8,0,0.4052639802296956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,128,1,float16,float16,0,0.4586346546808879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,128,1,float16,fp8,0,0.4431146780649821
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,128,1,fp8,fp8,0,0.4033279816309611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,128,1,float16,float16,0,0.4597546656926473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,128,1,float16,fp8,0,0.4456160068511963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,128,1,fp8,fp8,0,0.40556267897288006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,128,1,float16,float16,0,0.2853920062383016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,128,1,float16,fp8,0,0.28069865703582764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,128,1,fp8,fp8,0,0.2585493326187134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,128,1,float16,fp8,0,0.26680533091227215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,128,1,float16,float16,0,0.27476799488067627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,128,1,fp8,fp8,0,0.2366186579068502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,128,1,float16,float16,0,0.2750506599744161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,128,1,float16,fp8,0,0.2667359908421834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,128,1,fp8,fp8,0,0.23695999383926392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,128,1,float16,float16,0,0.27738134066263836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,128,1,float16,fp8,0,0.2670186758041382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,128,1,fp8,fp8,0,0.23712533712387085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,128,1,float16,float16,0,0.27720532814661664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,128,1,fp8,fp8,0,0.2387626568476359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,128,1,float16,fp8,0,0.26863465706507367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,96,1,128,1,fp8,fp8,0,10.213706970214844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,96,2,128,1,fp8,fp8,0,10.252010981241861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,96,4,128,1,fp8,fp8,0,10.39353052775065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,128,1,float16,fp8,0,5.747578938802083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,128,1,float16,float16,0,5.884127934773763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,96,8,128,1,fp8,fp8,0,10.830827077229818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,128,1,fp8,fp8,0,5.100031852722168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,128,1,float16,float16,0,5.888512293497722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,128,1,float16,fp8,0,5.772623697916667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,128,1,float16,fp8,0,6.895439783732097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,128,1,float16,float16,0,6.888959884643555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,128,1,fp8,fp8,0,5.116799990336101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,128,1,fp8,fp8,0,5.189189275105794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,128,1,float16,float16,0,5.842986424763997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,128,1,float16,fp8,0,5.8591359456380205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,128,1,float16,float16,0,5.997578938802083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,128,1,float16,fp8,0,6.0251413981119795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,128,1,fp8,fp8,0,5.4185333251953125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,128,1,float16,float16,0,2.859759966532389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,128,1,float16,float16,0,3.421349207560221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,128,1,float16,fp8,0,2.853573481241862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,128,1,float16,fp8,0,3.2954079310099282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,128,1,fp8,fp8,0,2.946810722351074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,128,1,fp8,fp8,0,2.5371519724527993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,128,1,float16,float16,0,2.873562812805176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,128,1,fp8,fp8,0,2.5793654123942056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,128,1,float16,float16,0,2.902736028035482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,128,1,float16,fp8,0,2.87497615814209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,128,1,float16,fp8,0,2.8906399408976235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,128,1,fp8,fp8,0,2.605583985646566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,128,1,float16,float16,0,2.9716161092122397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,128,1,float16,fp8,0,2.957029342651367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,128,1,float16,float16,0,1.4453600247701008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,128,1,fp8,fp8,0,2.7207358678181968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,128,1,float16,fp8,0,1.4393173853556316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,128,1,float16,float16,0,1.4534986813863118
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,128,1,fp8,fp8,0,1.2668373584747314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,128,1,float16,float16,0,1.6951840718587239
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,128,1,fp8,fp8,0,1.2926186720530193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,128,1,float16,fp8,0,1.4586025873819988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,128,1,fp8,fp8,0,1.469007968902588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,128,1,float16,fp8,0,1.6589120229085286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,128,1,float16,fp8,0,1.46015469233195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,128,1,fp8,fp8,0,1.3114773432413738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,128,1,float16,float16,0,1.4516266187032063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,128,1,float16,fp8,0,1.4785386721293132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,128,1,float16,float16,0,1.4812960624694824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,128,1,fp8,fp8,0,1.3641120592753093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,128,1,float16,float16,0,0.7387786706288656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,128,1,float16,fp8,0,0.7356533209482828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,128,1,fp8,fp8,0,0.6482186714808146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,128,1,float16,float16,0,0.7426239649454752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,128,1,float16,fp8,0,0.7437706788380941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,128,1,float16,float16,0,0.850810686747233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,128,1,float16,fp8,0,0.8328693707784017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,128,1,fp8,fp8,0,0.7496693134307861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,128,1,fp8,fp8,0,0.6584853331247965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,128,1,float16,fp8,0,0.7469013532002767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,128,1,float16,float16,0,0.7419679959615072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,128,1,fp8,fp8,0,0.6616213321685791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,128,1,float16,float16,0,0.7486293315887451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,128,1,float16,fp8,0,0.7476586500803629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,128,1,fp8,fp8,0,0.6905279954274496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,128,1,float16,float16,0,0.4433279832204183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,128,1,float16,fp8,0,0.3858933448791504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,128,1,float16,float16,0,0.38335466384887695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,128,1,fp8,fp8,0,0.33606934547424316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,128,1,fp8,fp8,0,0.38599467277526855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,128,1,float16,fp8,0,0.4318079948425293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,128,1,float16,float16,0,0.38465599219004315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,128,1,float16,fp8,0,0.38682134946187335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,128,1,fp8,fp8,0,0.34145065148671466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,128,1,float16,float16,0,0.3882346550623576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,128,1,float16,fp8,0,0.3863573471705119
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,128,1,fp8,fp8,0,0.342522660891215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,128,1,float16,float16,0,0.3906293312708537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,128,1,float16,fp8,0,0.39137065410614014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,128,1,fp8,fp8,0,0.35470934708913165
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,128,1,fp8,fp8,0,0.1743626594543457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,128,1,float16,fp8,0,0.23077332973480225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,128,1,float16,fp8,0,0.20735466480255127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,128,1,float16,float16,0,0.20510933796564737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,128,1,float16,float16,0,0.23644800980885824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,128,1,float16,float16,0,0.2068586746851603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,128,1,fp8,fp8,0,0.20265599091847739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,128,1,float16,fp8,0,0.2071626583735148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,128,1,float16,float16,0,0.20785067478815714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,128,1,fp8,fp8,0,0.17567465702692667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,128,1,float16,fp8,0,0.20869332551956177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,128,1,float16,float16,0,0.20962133010228476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,128,1,fp8,fp8,0,0.1755626598993937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,128,1,float16,fp8,0,0.2100586692492167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,128,1,fp8,fp8,0,0.1807360053062439
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,128,1,float16,float16,0,0.1307360033194224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,128,1,float16,float16,0,0.1104213297367096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,128,1,float16,fp8,0,0.1297920048236847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,128,1,fp8,fp8,0,0.11072533329327901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,128,1,float16,fp8,0,0.1104746659596761
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,128,1,float16,float16,0,0.11037866274515788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,128,1,float16,fp8,0,0.11058132847150166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,128,1,fp8,fp8,0,0.0950986643632253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,128,1,fp8,fp8,0,0.09730666875839233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,128,1,float16,fp8,0,0.11186666289965312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,128,1,float16,float16,0,0.11017066240310669
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,128,1,float16,float16,0,0.11143466830253601
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,128,1,fp8,fp8,0,0.09798933068911235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,128,1,fp8,fp8,0,0.0995786686738332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,128,1,float16,fp8,0,0.11071999867757161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,96,1,128,1,fp8,fp8,0,8.48194694519043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,96,2,128,1,fp8,fp8,0,8.5415891011556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,96,4,128,1,fp8,fp8,0,8.635754903157553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,1,128,1,float16,float16,0,4.504677454630534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,1,128,1,float16,fp8,0,4.496335983276367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,96,8,128,1,fp8,fp8,0,9.04153060913086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,1,128,1,fp8,fp8,0,4.2295786539713545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,2,128,1,float16,float16,0,4.511397361755371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,2,128,1,float16,fp8,0,4.511280059814453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,96,128,1,float16,float16,0,5.600639979044597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,2,128,1,fp8,fp8,0,4.2264054616292315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,96,128,1,float16,fp8,0,5.362970352172852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,4,128,1,fp8,fp8,0,4.267205238342285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,4,128,1,float16,fp8,0,4.531226793924968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,4,128,1,float16,float16,0,4.573274612426758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,8,128,1,float16,fp8,0,4.6565812428792315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,8,128,1,float16,float16,0,4.664901415506999
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,96,8,128,1,fp8,fp8,0,4.528122584025065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,1,128,1,float16,float16,0,2.2197866439819336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,1,128,1,float16,fp8,0,2.2150826454162598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,96,128,1,float16,float16,0,2.7353013356526694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,2,128,1,float16,float16,0,2.232975959777832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,96,128,1,float16,fp8,0,2.6493333180745444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,1,128,1,fp8,fp8,0,2.0957493782043457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,96,128,1,fp8,fp8,0,2.5124425888061523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,2,128,1,fp8,fp8,0,2.1120959917704263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,2,128,1,float16,fp8,0,2.23581329981486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,4,128,1,float16,float16,0,2.2623093922932944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,4,128,1,float16,fp8,0,2.2591253916422525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,4,128,1,fp8,fp8,0,2.144341309865316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,8,128,1,float16,float16,0,2.328277269999186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,8,128,1,float16,fp8,0,2.311349391937256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,1,128,1,float16,float16,0,1.1208693186442058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,96,8,128,1,fp8,fp8,0,2.2432212829589844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,1,128,1,float16,fp8,0,1.1239466667175293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,2,128,1,float16,float16,0,1.1311253706614177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,96,128,1,float16,fp8,0,1.3317440350850422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,2,128,1,float16,fp8,0,1.1286293665568035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,96,128,1,float16,float16,0,1.3786293665568035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,2,128,1,fp8,fp8,0,1.0586880048116047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,1,128,1,fp8,fp8,0,1.0426719983418782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,96,128,1,fp8,fp8,0,1.2399146556854248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,4,128,1,float16,float16,0,1.133680025736491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,4,128,1,float16,fp8,0,1.1388479868570964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,4,128,1,fp8,fp8,0,1.0814719994862874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,8,128,1,float16,float16,0,1.1623893578847249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,8,128,1,fp8,fp8,0,1.1333119869232178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,96,8,128,1,float16,fp8,0,1.1483413378397624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,1,128,1,float16,float16,0,0.5719519853591919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,1,128,1,float16,fp8,0,0.5723040103912354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,1,128,1,fp8,fp8,0,0.5291733344395956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,2,128,1,float16,float16,0,0.5733919938405355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,96,128,1,float16,fp8,0,0.6732532978057861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,96,128,1,float16,float16,0,0.691093365351359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,2,128,1,float16,fp8,0,0.5794399976730347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,96,128,1,fp8,fp8,0,0.6308853228886923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,2,128,1,fp8,fp8,0,0.5451840162277222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,4,128,1,float16,float16,0,0.5801386833190918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,4,128,1,float16,fp8,0,0.5821653207143148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,4,128,1,fp8,fp8,0,0.5431520144144694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,8,128,1,float16,float16,0,0.5848693450291952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,8,128,1,float16,fp8,0,0.5852320194244385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,1,128,1,float16,fp8,0,0.29863999287287396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,1,128,1,float16,float16,0,0.29759466648101807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,96,128,1,float16,float16,0,0.35949865976969403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,96,8,128,1,fp8,fp8,0,0.5759573380152384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,1,128,1,fp8,fp8,0,0.27581334114074707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,2,128,1,float16,float16,0,0.2999946673711141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,96,128,1,fp8,fp8,0,0.3272053400675456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,96,128,1,float16,fp8,0,0.35042134920756024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,2,128,1,float16,fp8,0,0.3001280029614766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,2,128,1,fp8,fp8,0,0.2829493284225464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,4,128,1,float16,float16,0,0.3006666700045268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,4,128,1,fp8,fp8,0,0.28355199098587036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,8,128,1,float16,float16,0,0.30404265721638996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,4,128,1,float16,fp8,0,0.30106133222579956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,8,128,1,float16,fp8,0,0.3059840003649394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,1,128,1,float16,float16,0,0.1606880029042562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,1,128,1,float16,fp8,0,0.1606613298257192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,96,8,128,1,fp8,fp8,0,0.2939786712328593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,96,128,1,float16,fp8,0,0.18668266137441
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,96,128,1,float16,float16,0,0.19220799207687378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,96,128,1,fp8,fp8,0,0.17245332400004068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,2,128,1,float16,float16,0,0.1622773309548696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,1,128,1,fp8,fp8,0,0.14882133404413858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,2,128,1,float16,fp8,0,0.1620266636212667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,2,128,1,fp8,fp8,0,0.15005333224932352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,4,128,1,float16,fp8,0,0.1625493367513021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,4,128,1,fp8,fp8,0,0.1508746643861135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,8,128,1,float16,float16,0,0.16382933656374613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,4,128,1,float16,float16,0,0.1623199979464213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,8,128,1,float16,fp8,0,0.1644266645113627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,1,128,1,float16,float16,0,0.08910399675369263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,96,128,1,float16,float16,0,0.1125866671403249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,96,8,128,1,fp8,fp8,0,0.15412799517313638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,1,128,1,float16,fp8,0,0.08927466471989949
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,96,128,1,float16,fp8,0,0.10966400305430095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,96,128,1,fp8,fp8,0,0.0956106682618459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,2,128,1,float16,float16,0,0.08938133716583252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,1,128,1,fp8,fp8,0,0.08295999964078267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,2,128,1,fp8,fp8,0,0.08266133566697438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,2,128,1,float16,fp8,0,0.08865066369374593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,4,128,1,float16,fp8,0,0.08973866701126099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,8,128,1,float16,float16,0,0.08957333366076152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,4,128,1,fp8,fp8,0,0.08408000071843465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,4,128,1,float16,float16,0,0.08964799841245015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,8,128,1,float16,fp8,0,0.09125866492589314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,96,8,128,1,fp8,fp8,0,0.08644800384839375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,1,128,1,float16,float16,0,0.05158400038878123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,96,128,1,float16,float16,0,0.061754668752352394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,96,128,1,float16,fp8,0,0.05985066791375478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,1,128,1,float16,fp8,0,0.05156266689300537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,96,128,1,fp8,fp8,0,0.05857066810131073
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,2,128,1,float16,float16,0,0.051685333251953125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,2,128,1,float16,fp8,0,0.05161599814891815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,1,128,1,fp8,fp8,0,0.04849599798520406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,2,128,1,fp8,fp8,0,0.048207998275756836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,4,128,1,float16,fp8,0,0.05166399975617727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,4,128,1,fp8,fp8,0,0.04900800188382467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,4,128,1,float16,float16,0,0.05160533388455709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,8,128,1,float16,float16,0,0.052426666021347046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,8,128,1,float16,fp8,0,0.05213333169619242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,96,8,128,1,fp8,fp8,0,0.04946133494377136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,1,128,1,fp8,fp8,0,3.660698572794596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,1,128,1,float16,fp8,0,3.8467893600463867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,2,128,1,float16,fp8,0,3.843690554300944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,4,128,1,float16,fp8,0,3.8853225708007812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,2,128,1,float16,float16,0,3.866959889729818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,4,128,1,float16,float16,0,3.9141387939453125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,1,128,1,float16,float16,0,3.8611253102620444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,2,128,1,fp8,fp8,0,3.674480120340983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,1,128,1,float16,float16,0,1.9063679377237956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,4,128,1,fp8,fp8,0,3.7313706080118814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,8,128,1,float16,fp8,0,3.9664427439371743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,8,128,1,float16,float16,0,4.016448020935059
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,96,8,128,1,fp8,fp8,0,3.944682757059733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,1,128,1,float16,fp8,0,1.8941814104715984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,1,128,1,fp8,fp8,0,1.8156959215799968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,96,128,1,float16,fp8,0,2.3471412658691406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,96,128,1,float16,float16,0,2.4216747283935547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,2,128,1,float16,float16,0,1.8995787302652996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,2,128,1,float16,fp8,0,1.9080959955851238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,2,128,1,fp8,fp8,0,1.8536319732666016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,4,128,1,float16,float16,0,1.9408639272054036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,4,128,1,float16,fp8,0,1.9224586486816406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,96,128,1,fp8,fp8,0,2.254037380218506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,4,128,1,fp8,fp8,0,1.8810186386108398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,8,128,1,float16,float16,0,1.9981226921081543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,8,128,1,float16,fp8,0,1.985498587290446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,96,8,128,1,fp8,fp8,0,1.978432019551595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,1,128,1,float16,fp8,0,0.9570079644521078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,1,128,1,float16,float16,0,0.9553706645965576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,1,128,1,fp8,fp8,0,0.9045120080312093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,2,128,1,float16,float16,0,0.9672266642252604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,2,128,1,float16,fp8,0,0.9678933620452881
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,96,128,1,float16,float16,0,1.2154560089111328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,2,128,1,fp8,fp8,0,0.9192907015482584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,96,128,1,fp8,fp8,0,1.1204053560892742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,96,128,1,float16,fp8,0,1.1701227029164631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,4,128,1,float16,fp8,0,0.9666933218638102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,4,128,1,float16,float16,0,0.9673333168029785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,4,128,1,fp8,fp8,0,0.934821367263794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,8,128,1,float16,float16,0,0.994154691696167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,1,128,1,float16,fp8,0,0.4867466688156128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,8,128,1,float16,fp8,0,0.9883999824523926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,1,128,1,float16,float16,0,0.4885546763737996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,1,128,1,fp8,fp8,0,0.45878398418426514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,96,8,128,1,fp8,fp8,0,0.9994613329569498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,96,128,1,float16,float16,0,0.6131893396377563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,2,128,1,float16,float16,0,0.4914720058441162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,2,128,1,float16,fp8,0,0.49262932936350506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,4,128,1,float16,float16,0,0.49241065979003906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,2,128,1,fp8,fp8,0,0.47092266877492267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,4,128,1,float16,fp8,0,0.49506131807963055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,96,128,1,fp8,fp8,0,0.5691839853922526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,96,128,1,float16,fp8,0,0.5916159947713217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,4,128,1,fp8,fp8,0,0.4740266799926758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,8,128,1,float16,float16,0,0.49797332286834717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,8,128,1,float16,fp8,0,0.5004746516545614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,1,128,1,float16,float16,0,0.25524266560872394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,96,8,128,1,fp8,fp8,0,0.5030080080032349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,1,128,1,float16,fp8,0,0.25421865781148273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,96,128,1,float16,fp8,0,0.30779733260472614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,96,128,1,fp8,fp8,0,0.29392000039418537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,1,128,1,fp8,fp8,0,0.2402613361676534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,2,128,1,float16,float16,0,0.2565653324127197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,2,128,1,fp8,fp8,0,0.24597332874933878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,96,128,1,float16,float16,0,0.31757867336273193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,2,128,1,float16,fp8,0,0.2571626702944438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,4,128,1,float16,float16,0,0.2569173375765483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,4,128,1,float16,fp8,0,0.2576106588045756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,4,128,1,fp8,fp8,0,0.24838932355244955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,8,128,1,float16,float16,0,0.26057066520055133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,8,128,1,fp8,fp8,0,0.2585653265317281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,96,8,128,1,float16,fp8,0,0.26133867104848224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,1,128,1,float16,float16,0,0.13823466499646506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,1,128,1,float16,fp8,0,0.1379200021425883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,96,128,1,fp8,fp8,0,0.16100266575813293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,1,128,1,fp8,fp8,0,0.1329919993877411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,96,128,1,float16,float16,0,0.17084799210230509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,2,128,1,float16,float16,0,0.1378613313039144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,96,128,1,float16,fp8,0,0.16569599509239197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,2,128,1,float16,fp8,0,0.13821333646774292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,2,128,1,fp8,fp8,0,0.135343998670578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,4,128,1,float16,float16,0,0.13854933778444925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,4,128,1,fp8,fp8,0,0.13486400246620178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,4,128,1,float16,fp8,0,0.13893866539001465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,8,128,1,float16,fp8,0,0.14059733351071677
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,8,128,1,float16,float16,0,0.14108799894650778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,96,8,128,1,fp8,fp8,0,0.13909332950909933
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,96,128,1,float16,fp8,0,0.10059199730555217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,96,128,1,float16,float16,0,0.1053013304869334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,1,128,1,float16,fp8,0,0.0784746656815211
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,1,128,1,float16,float16,0,0.07844799757003784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,1,128,1,fp8,fp8,0,0.0738560010989507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,96,128,1,fp8,fp8,0,0.09089066584904988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,2,128,1,fp8,fp8,0,0.07486933469772339
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,2,128,1,float16,float16,0,0.0790880024433136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,2,128,1,float16,fp8,0,0.07919999957084656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,4,128,1,float16,fp8,0,0.07905066510041554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,4,128,1,float16,float16,0,0.07983466486136119
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,4,128,1,fp8,fp8,0,0.07508266468842824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,8,128,1,float16,float16,0,0.07984533409277599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,8,128,1,float16,fp8,0,0.08085866769154866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,96,8,128,1,fp8,fp8,0,0.07807466884454091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,96,128,1,fp8,fp8,0,0.053317333261171974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,96,128,1,float16,float16,0,0.05644799768924713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,1,128,1,float16,fp8,0,0.04606399933497111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,1,128,1,fp8,fp8,0,0.042463997999827065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,1,128,1,float16,float16,0,0.04688533147176107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,2,128,1,float16,float16,0,0.04670399924119314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,96,128,1,float16,fp8,0,0.054917335510253906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,2,128,1,float16,fp8,0,0.046485334634780884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,2,128,1,fp8,fp8,0,0.04254400233427683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,4,128,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,4,128,1,fp8,fp8,0,0.04310933252175649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,4,128,1,float16,float16,0,0.04675200084845225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,8,128,1,float16,fp8,0,0.047295997540156044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,8,128,1,float16,float16,0,0.04713066418965658
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,96,128,1,float16,float16,0,0.036848001182079315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,96,8,128,1,fp8,fp8,0,0.043978666265805565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,96,128,1,float16,fp8,0,0.03626133253177007
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,96,128,1,fp8,fp8,0,0.035829332967599235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,1,128,1,float16,float16,0,0.033887999753157295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,1,128,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,2,128,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,1,128,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,2,128,1,float16,float16,0,0.033717334270477295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,4,128,1,float16,float16,0,0.033941333492596946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,2,128,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,4,128,1,float16,fp8,0,0.0336053321758906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,4,128,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,8,128,1,float16,float16,0,0.03427733232577642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,8,128,1,float16,fp8,0,0.03431999931732813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,96,8,128,1,fp8,fp8,0,0.03223466624816259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,1,128,1,float16,fp8,0,1.7110026677449544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,2,128,1,float16,fp8,0,1.732917308807373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,2,128,1,float16,float16,0,1.7325867017110188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,4,128,1,float16,float16,0,1.776144027709961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,1,128,1,float16,float16,0,1.7303147315979004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,1,128,1,fp8,fp8,0,1.8195734024047852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,2,128,1,fp8,fp8,0,1.8277014096577961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,4,128,1,float16,fp8,0,1.768330732981364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,1,128,1,float16,float16,0,0.8693280220031738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,4,128,1,fp8,fp8,0,1.865114688873291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,8,128,1,float16,float16,0,1.802303949991862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,8,128,1,float16,fp8,0,1.792319933573405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,96,8,128,1,fp8,fp8,0,1.9708320299784343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,1,128,1,float16,fp8,0,0.8675786654154459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,96,128,1,float16,float16,0,1.1449493567148845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,1,128,1,fp8,fp8,0,0.8984159628550211
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,96,128,1,float16,fp8,0,1.1037973562876384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,96,128,1,fp8,fp8,0,1.1121333440144856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,2,128,1,float16,float16,0,0.874613364537557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,2,128,1,float16,fp8,0,0.8758986790974935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,2,128,1,fp8,fp8,0,0.9201227029164633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,4,128,1,float16,float16,0,0.8840906620025635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,4,128,1,float16,fp8,0,0.887493371963501
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,4,128,1,fp8,fp8,0,0.9357759952545166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,8,128,1,float16,float16,0,0.9109066327412924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,8,128,1,float16,fp8,0,0.9046879609425863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,96,8,128,1,fp8,fp8,0,0.9945066769917806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,1,128,1,float16,float16,0,0.4463413159052531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,1,128,1,float16,fp8,0,0.44379734992980957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,96,128,1,float16,float16,0,0.5752480030059814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,96,128,1,float16,fp8,0,0.5581760009129842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,1,128,1,fp8,fp8,0,0.45606398582458496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,2,128,1,float16,float16,0,0.44552000363667804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,2,128,1,float16,fp8,0,0.4489440123240153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,2,128,1,fp8,fp8,0,0.46855465571085614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,4,128,1,float16,float16,0,0.4500906864802043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,4,128,1,float16,fp8,0,0.4517066478729248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,4,128,1,fp8,fp8,0,0.46859200795491535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,96,128,1,fp8,fp8,0,0.5669066508611044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,8,128,1,float16,float16,0,0.46061333020528156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,8,128,1,float16,fp8,0,0.45996801058451336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,96,8,128,1,fp8,fp8,0,0.5060533285140991
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,1,128,1,float16,float16,0,0.23572266101837158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,1,128,1,fp8,fp8,0,0.23877867062886557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,1,128,1,float16,fp8,0,0.23557867606480917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,2,128,1,float16,float16,0,0.23461333910624185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,96,128,1,float16,float16,0,0.30959467093149823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,96,128,1,float16,fp8,0,0.2977280020713806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,96,128,1,fp8,fp8,0,0.29309866825739544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,2,128,1,float16,fp8,0,0.23530133565266928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,2,128,1,fp8,fp8,0,0.2427039941151937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,4,128,1,float16,float16,0,0.23689599831899008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,4,128,1,float16,fp8,0,0.23828800519307455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,4,128,1,fp8,fp8,0,0.24542933702468872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,8,128,1,float16,float16,0,0.24299200375874838
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,8,128,1,float16,fp8,0,0.24143467346827188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,96,8,128,1,fp8,fp8,0,0.2578773299853007
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,1,128,1,float16,float16,0,0.12950399518013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,96,128,1,float16,float16,0,0.16965333620707193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,96,128,1,float16,fp8,0,0.1625599960486094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,1,128,1,float16,fp8,0,0.12949867049853006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,96,128,1,fp8,fp8,0,0.15865066647529602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,1,128,1,fp8,fp8,0,0.13117866714795431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,2,128,1,float16,float16,0,0.12873066465059915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,2,128,1,float16,fp8,0,0.12931733330090842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,4,128,1,float16,float16,0,0.1304373343785604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,4,128,1,float16,fp8,0,0.13146666685740152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,4,128,1,fp8,fp8,0,0.13362133502960205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,2,128,1,fp8,fp8,0,0.1341493328412374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,8,128,1,float16,float16,0,0.13173333803812662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,8,128,1,fp8,fp8,0,0.13864533106486002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,96,8,128,1,float16,fp8,0,0.13341333468755087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,96,128,1,float16,float16,0,0.0978559950987498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,1,128,1,float16,float16,0,0.07374399900436401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,1,128,1,fp8,fp8,0,0.07369600236415863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,2,128,1,float16,float16,0,0.07333866755167644
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,96,128,1,float16,fp8,0,0.09470400214195251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,96,128,1,fp8,fp8,0,0.08960533142089844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,2,128,1,float16,fp8,0,0.0740533322095871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,1,128,1,float16,fp8,0,0.07408000032107036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,2,128,1,fp8,fp8,0,0.07401599983374278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,4,128,1,float16,float16,0,0.07419200241565704
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,4,128,1,float16,fp8,0,0.07420266668001811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,4,128,1,fp8,fp8,0,0.07503466804822286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,8,128,1,float16,float16,0,0.07472000022729237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,8,128,1,float16,fp8,0,0.07501866420110066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,96,8,128,1,fp8,fp8,0,0.07758399844169617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,96,128,1,float16,fp8,0,0.052799999713897705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,96,128,1,float16,float16,0,0.05504000186920166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,96,128,1,fp8,fp8,0,0.05324266850948334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,1,128,1,float16,float16,0,0.04426133135954539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,1,128,1,float16,fp8,0,0.04359999795754751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,1,128,1,fp8,fp8,0,0.04238933324813843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,2,128,1,float16,float16,0,0.043882668018341064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,2,128,1,float16,fp8,0,0.04418133199214935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,2,128,1,fp8,fp8,0,0.042410666743914284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,4,128,1,float16,float16,0,0.04465066889921824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,4,128,1,float16,fp8,0,0.04435733457406362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,4,128,1,fp8,fp8,0,0.04279999931653341
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,8,128,1,float16,float16,0,0.04461866617202759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,8,128,1,float16,fp8,0,0.04479999840259552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,96,8,128,1,fp8,fp8,0,0.044362664222717285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,96,128,1,float16,float16,0,0.03565866748491923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,96,128,1,float16,fp8,0,0.03608000030120214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,1,128,1,float16,float16,0,0.033088001112143196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,1,128,1,float16,fp8,0,0.03279466678698858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,96,128,1,fp8,fp8,0,0.03481066723664602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,1,128,1,fp8,fp8,0,0.03173866619666418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,2,128,1,float16,float16,0,0.03272533416748047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,2,128,1,float16,fp8,0,0.033488000432650246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,2,128,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,4,128,1,float16,float16,0,0.03294399877389272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,4,128,1,float16,fp8,0,0.03295466552178065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,4,128,1,fp8,fp8,0,0.03177600105603536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,8,128,1,float16,float16,0,0.03332266708215078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,8,128,1,float16,fp8,0,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,96,8,128,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,96,128,1,float16,float16,0,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,96,128,1,float16,fp8,0,0.024853333830833435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,96,128,1,fp8,fp8,0,0.024634666740894318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,1,128,1,float16,float16,0,0.022991999983787537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,1,128,1,float16,fp8,0,0.023498666783173878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,1,128,1,fp8,fp8,0,0.022490667800108593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,2,128,1,float16,float16,0,0.023103999594847362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,2,128,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,4,128,1,float16,float16,0,0.023573334018389385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,2,128,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,4,128,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,4,128,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,8,128,1,float16,float16,0,0.0232640008131663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,8,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,96,8,128,1,fp8,fp8,0,0.022890667120615642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,1,128,1,float16,float16,0,1.1065759658813477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,1,128,1,fp8,fp8,0,1.190335988998413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,2,128,1,float16,fp8,0,1.1195627053578694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,2,128,1,float16,float16,0,1.1184266408284504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,1,128,1,float16,fp8,0,1.1054506301879883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,2,128,1,fp8,fp8,0,1.214080015818278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,4,128,1,float16,float16,0,1.1223999659220378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,4,128,1,float16,fp8,0,1.1233226458231609
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,1,128,1,float16,float16,0,0.5624159971872965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,4,128,1,fp8,fp8,0,1.2348106702168782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,8,128,1,float16,float16,0,1.1510079701741536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,8,128,1,float16,fp8,0,1.1671040058135986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,96,8,128,1,fp8,fp8,0,1.309653361638387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,1,128,1,float16,fp8,0,0.5615093310674032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,96,128,1,float16,float16,0,0.64573868115743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,96,128,1,float16,fp8,0,0.6286506652832031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,96,128,1,fp8,fp8,0,0.719264030456543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,2,128,1,float16,float16,0,0.5691519975662231
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,1,128,1,fp8,fp8,0,0.6044266621271769
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,2,128,1,fp8,fp8,0,0.6178293228149414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,2,128,1,float16,fp8,0,0.5686560074488322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,4,128,1,float16,float16,0,0.5672906637191772
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,4,128,1,float16,fp8,0,0.5703519980112711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,4,128,1,fp8,fp8,0,0.6250026623407999
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,8,128,1,float16,float16,0,0.5810133218765259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,8,128,1,float16,fp8,0,0.5764106512069702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,96,8,128,1,fp8,fp8,0,0.6660053332646688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,1,128,1,float16,float16,0,0.29201066493988037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,1,128,1,float16,fp8,0,0.2912213404973348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,96,128,1,float16,float16,0,0.335647980372111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,96,128,1,float16,fp8,0,0.3245973388353984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,1,128,1,fp8,fp8,0,0.3148266673088074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,2,128,1,float16,float16,0,0.2932800054550171
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,96,128,1,fp8,fp8,0,0.3718666632970174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,2,128,1,float16,fp8,0,0.29315733909606934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,4,128,1,float16,float16,0,0.29385600487391156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,2,128,1,fp8,fp8,0,0.3200053373972575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,4,128,1,float16,fp8,0,0.2940640052159627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,4,128,1,fp8,fp8,0,0.32390934228897095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,8,128,1,float16,fp8,0,0.2981333335240682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,8,128,1,fp8,fp8,0,0.33983465035756427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,96,8,128,1,float16,float16,0,0.2985706726710002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,96,128,1,float16,float16,0,0.17896533012390137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,1,128,1,float16,float16,0,0.15505599975585938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,1,128,1,float16,fp8,0,0.15602667133013406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,96,128,1,float16,fp8,0,0.17429866393407187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,1,128,1,fp8,fp8,0,0.17089066902796426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,96,128,1,fp8,fp8,0,0.19681066274642944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,2,128,1,float16,float16,0,0.1560426652431488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,2,128,1,float16,fp8,0,0.15650666753451029
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,2,128,1,fp8,fp8,0,0.1715839902559916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,4,128,1,float16,float16,0,0.15626666943232217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,4,128,1,float16,fp8,0,0.15827199816703796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,4,128,1,fp8,fp8,0,0.17269333203633627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,8,128,1,fp8,fp8,0,0.17575999101003012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,8,128,1,float16,float16,0,0.15893333156903586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,96,8,128,1,float16,fp8,0,0.15849600235621134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,96,128,1,float16,float16,0,0.09924800197283427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,1,128,1,float16,float16,0,0.08601066470146179
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,1,128,1,float16,fp8,0,0.08660800258318584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,96,128,1,fp8,fp8,0,0.10745066404342651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,96,128,1,float16,fp8,0,0.09619200229644775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,1,128,1,fp8,fp8,0,0.09368000427881877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,2,128,1,float16,fp8,0,0.08676266670227051
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,2,128,1,fp8,fp8,0,0.09334400296211243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,4,128,1,float16,float16,0,0.08664000034332275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,2,128,1,float16,float16,0,0.08642133076985677
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,4,128,1,float16,fp8,0,0.08721599976221721
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,4,128,1,fp8,fp8,0,0.09503466884295146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,8,128,1,float16,float16,0,0.08706667025883992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,8,128,1,float16,fp8,0,0.08820266524950664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,96,8,128,1,fp8,fp8,0,0.09779199957847595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,96,128,1,float16,float16,0,0.05738666653633118
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,1,128,1,float16,float16,0,0.050101334849993386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,96,128,1,float16,fp8,0,0.055445333321889244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,1,128,1,float16,fp8,0,0.04996266464392344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,1,128,1,fp8,fp8,0,0.05285866558551788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,96,128,1,fp8,fp8,0,0.06299200157324474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,2,128,1,float16,float16,0,0.04971733192602793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,2,128,1,float16,fp8,0,0.05004266897837321
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,2,128,1,fp8,fp8,0,0.05345066885153452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,4,128,1,float16,fp8,0,0.050613333781560264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,4,128,1,float16,float16,0,0.04997866849104563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,4,128,1,fp8,fp8,0,0.05310933291912079
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,8,128,1,float16,float16,0,0.050554667909940086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,8,128,1,float16,fp8,0,0.05126399795214335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,96,8,128,1,fp8,fp8,0,0.05469333132108053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,96,128,1,float16,float16,0,0.034389334420363106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,1,128,1,float16,float16,0,0.03336533407370249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,1,128,1,float16,fp8,0,0.033546666304270424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,96,128,1,fp8,fp8,0,0.03860799968242645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,1,128,1,fp8,fp8,0,0.034671999514102936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,2,128,1,float16,float16,0,0.033301333586374916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,2,128,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,96,128,1,float16,fp8,0,0.03482666611671448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,2,128,1,fp8,fp8,0,0.035045333206653595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,4,128,1,float16,float16,0,0.03347733368476232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,4,128,1,float16,fp8,0,0.0339626669883728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,4,128,1,fp8,fp8,0,0.03530666728814443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,8,128,1,float16,float16,0,0.03344533344109853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,8,128,1,fp8,fp8,0,0.03537066777547201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,96,8,128,1,float16,fp8,0,0.033733333150545754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,96,128,1,float16,float16,0,0.02649066597223282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,96,128,1,float16,fp8,0,0.02643200010061264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,96,128,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,1,128,1,float16,float16,0,0.025386666258176167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,1,128,1,float16,fp8,0,0.025610665480295818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,1,128,1,fp8,fp8,0,0.026378666361172993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,2,128,1,float16,float16,0,0.025818665822347004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,2,128,1,float16,fp8,0,0.025733334322770435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,2,128,1,fp8,fp8,0,0.026901334524154663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,4,128,1,float16,float16,0,0.025493333737055462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,4,128,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,4,128,1,fp8,fp8,0,0.02651199946800868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,8,128,1,float16,float16,0,0.02584533393383026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,8,128,1,fp8,fp8,0,0.026874666412671406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,96,8,128,1,float16,fp8,0,0.025802666942278545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,96,128,1,float16,fp8,0,0.01978133370478948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,96,128,1,float16,float16,0,0.019845332950353622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,96,128,1,fp8,fp8,0,0.020645332833131153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,1,128,1,float16,fp8,0,0.018522666146357853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,1,128,1,float16,float16,0,0.018794666975736618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,1,128,1,fp8,fp8,0,0.019941333681344986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,2,128,1,float16,float16,0,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,2,128,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,2,128,1,fp8,fp8,0,0.01985599969824155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,4,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,4,128,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,4,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,8,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,8,128,1,float16,float16,0,0.018992000569899876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,96,8,128,1,fp8,fp8,0,0.01993600030740102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,1,128,1,float16,float16,0,0.7486879825592041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,1,128,1,fp8,fp8,0,0.9585599899291992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,2,128,1,float16,float16,0,0.7613706588745117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,2,128,1,fp8,fp8,0,0.9687306880950928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,1,128,1,float16,fp8,0,0.7513759930928549
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,2,128,1,float16,fp8,0,0.765893300374349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,4,128,1,float16,float16,0,0.7740106582641602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,4,128,1,float16,fp8,0,0.7744692961374918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,1,128,1,float16,float16,0,0.3843573331832886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,8,128,1,float16,float16,0,0.7881066799163818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,4,128,1,fp8,fp8,0,0.9739413261413574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,8,128,1,float16,fp8,0,0.7900479634602865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,96,8,128,1,fp8,fp8,0,1.0156373182932537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,96,128,1,float16,fp8,0,0.420906662940979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,1,128,1,float16,fp8,0,0.3847839832305908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,96,128,1,float16,float16,0,0.4307573239008586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,96,128,1,fp8,fp8,0,0.5445653200149536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,1,128,1,fp8,fp8,0,0.49139201641082764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,2,128,1,float16,float16,0,0.3900373379389445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,2,128,1,float16,fp8,0,0.3904000123341878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,2,128,1,fp8,fp8,0,0.4987306594848633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,4,128,1,float16,float16,0,0.3956853151321411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,4,128,1,float16,fp8,0,0.39585065841674805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,4,128,1,fp8,fp8,0,0.4994666576385498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,8,128,1,float16,float16,0,0.40380267302195233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,8,128,1,float16,fp8,0,0.4041866858800252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,96,8,128,1,fp8,fp8,0,0.5164479811986288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,96,128,1,float16,float16,0,0.22718934218088785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,1,128,1,float16,fp8,0,0.20129066705703735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,1,128,1,float16,float16,0,0.20085332791010538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,1,128,1,fp8,fp8,0,0.25808000564575195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,96,128,1,float16,fp8,0,0.22084800402323404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,2,128,1,float16,float16,0,0.20363199710845947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,96,128,1,fp8,fp8,0,0.2824373245239258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,2,128,1,float16,fp8,0,0.2049973408381144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,2,128,1,fp8,fp8,0,0.2600533366203308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,4,128,1,float16,float16,0,0.20618132750193277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,4,128,1,float16,fp8,0,0.2076693375905355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,4,128,1,fp8,fp8,0,0.26083733638127643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,8,128,1,float16,float16,0,0.21104000012079874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,8,128,1,float16,fp8,0,0.2103839914004008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,96,128,1,float16,float16,0,0.12227200468381245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,96,8,128,1,fp8,fp8,0,0.26506133874257404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,1,128,1,float16,float16,0,0.10750400026639302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,96,128,1,float16,fp8,0,0.1192586620648702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,1,128,1,float16,fp8,0,0.10838400324185689
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,1,128,1,fp8,fp8,0,0.1405333379904429
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,2,128,1,float16,float16,0,0.10831466317176819
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,96,128,1,fp8,fp8,0,0.15346666177113852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,2,128,1,float16,fp8,0,0.10782933235168457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,2,128,1,fp8,fp8,0,0.1404906709988912
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,4,128,1,float16,float16,0,0.10803199807802837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,4,128,1,float16,fp8,0,0.10853866736094157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,4,128,1,fp8,fp8,0,0.14124266306559244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,8,128,1,float16,float16,0,0.1097813347975413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,8,128,1,float16,fp8,0,0.1095360020796458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,96,8,128,1,fp8,fp8,0,0.14388799667358398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,96,128,1,float16,float16,0,0.06912533442179362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,96,128,1,float16,fp8,0,0.06703466673692067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,1,128,1,float16,float16,0,0.060736000537872314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,1,128,1,float16,fp8,0,0.06091199815273285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,96,128,1,fp8,fp8,0,0.0860586663087209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,1,128,1,fp8,fp8,0,0.07763733466466267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,2,128,1,float16,float16,0,0.06118399898211161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,2,128,1,fp8,fp8,0,0.07695466776688893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,4,128,1,float16,float16,0,0.06113600234190623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,2,128,1,float16,fp8,0,0.06126933296521505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,4,128,1,float16,fp8,0,0.06124266485373179
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,4,128,1,fp8,fp8,0,0.0775733341773351
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,8,128,1,float16,float16,0,0.061749334136645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,8,128,1,float16,fp8,0,0.06180266539255778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,96,8,128,1,fp8,fp8,0,0.07840533554553986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,96,128,1,float16,float16,0,0.03933866570393244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,96,128,1,float16,fp8,0,0.04018666595220566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,1,128,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,1,128,1,fp8,fp8,0,0.046725332736968994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,2,128,1,float16,float16,0,0.03938666731119156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,2,128,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,1,128,1,float16,float16,0,0.03942933430274328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,96,128,1,fp8,fp8,0,0.050341332952181496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,2,128,1,fp8,fp8,0,0.04716266691684723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,4,128,1,float16,fp8,0,0.039621333281199135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,4,128,1,float16,float16,0,0.03948266555865606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,4,128,1,fp8,fp8,0,0.04669866462548574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,8,128,1,float16,float16,0,0.03952533255020777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,8,128,1,float16,fp8,0,0.03959999978542328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,96,8,128,1,fp8,fp8,0,0.04734933376312256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,96,128,1,float16,fp8,0,0.028058665494124096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,96,128,1,fp8,fp8,0,0.032730666299661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,1,128,1,float16,fp8,0,0.0268053337931633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,96,128,1,float16,float16,0,0.0276853342851003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,1,128,1,float16,float16,0,0.027066667874654133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,2,128,1,float16,float16,0,0.027221334477265675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,1,128,1,fp8,fp8,0,0.031002665559450786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,2,128,1,float16,fp8,0,0.026869334280490875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,2,128,1,fp8,fp8,0,0.030960001051425934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,4,128,1,float16,float16,0,0.026629333694775898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,4,128,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,8,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,8,128,1,float16,float16,0,0.02717866748571396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,8,128,1,fp8,fp8,0,0.03193599979082743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,96,4,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,96,128,1,float16,float16,0,0.02141333371400833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,96,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,96,128,1,fp8,fp8,0,0.024314666787783306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,1,128,1,float16,float16,0,0.021173333128293354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,1,128,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,1,128,1,fp8,fp8,0,0.02383466561635335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,2,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,2,128,1,fp8,fp8,0,0.023973333338896435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,4,128,1,float16,float16,0,0.021114667256673176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,4,128,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,8,128,1,float16,float16,0,0.02162133405605952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,4,128,1,fp8,fp8,0,0.024031999210516613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,2,128,1,float16,fp8,0,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,8,128,1,float16,fp8,0,0.02163733293612798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,96,8,128,1,fp8,fp8,0,0.024154665569464367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,96,128,1,float16,float16,0,0.017488000293572743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,96,128,1,fp8,fp8,0,0.01977066695690155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,96,128,1,float16,fp8,0,0.0176959993938605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,1,128,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,1,128,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,2,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,2,128,1,float16,fp8,0,0.017877332866191864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,2,128,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,1,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,4,128,1,float16,float16,0,0.017621333400408428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,4,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,4,128,1,float16,fp8,0,0.017637333522240322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,8,128,1,float16,float16,0,0.017360000560681026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,8,128,1,float16,fp8,0,0.018058666338523228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,96,8,128,1,fp8,fp8,0,0.019946667055288952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,1,128,1,float16,float16,0,0.6405973434448242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,1,128,1,float16,fp8,0,0.6397386789321899
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,1,128,1,fp8,fp8,0,0.8462506930033366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,2,128,1,float16,float16,0,0.6442933479944865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,2,128,1,fp8,fp8,0,0.8536053498586019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,2,128,1,float16,fp8,0,0.6475893259048462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,4,128,1,float16,float16,0,0.6509439945220947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,4,128,1,float16,fp8,0,0.6499626636505127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,4,128,1,fp8,fp8,0,0.8530826568603516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,1,128,1,float16,float16,0,0.32846399148305255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,8,128,1,float16,fp8,0,0.6606186628341675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,8,128,1,fp8,fp8,0,0.876261313756307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,96,8,128,1,float16,float16,0,0.6590346495310465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,96,128,1,float16,float16,0,0.33949331442515057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,96,128,1,fp8,fp8,0,0.45610666275024414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,96,128,1,float16,fp8,0,0.331386665503184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,1,128,1,float16,fp8,0,0.32887999216715497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,1,128,1,fp8,fp8,0,0.43613334496816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,2,128,1,float16,float16,0,0.3315253257751465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,2,128,1,float16,fp8,0,0.33265066146850586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,4,128,1,float16,float16,0,0.3340533177057902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,2,128,1,fp8,fp8,0,0.4376480182011922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,4,128,1,float16,fp8,0,0.3349279959996541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,4,128,1,fp8,fp8,0,0.43727465470631915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,8,128,1,float16,float16,0,0.3384480079015096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,8,128,1,float16,fp8,0,0.33742932478586835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,96,8,128,1,fp8,fp8,0,0.4434986511866252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,96,128,1,float16,float16,0,0.1760746637980143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,1,128,1,float16,float16,0,0.17140267292658487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,1,128,1,float16,fp8,0,0.17173333962758383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,96,128,1,float16,fp8,0,0.17296000321706137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,96,128,1,fp8,fp8,0,0.24065067370732626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,1,128,1,fp8,fp8,0,0.22929600874582926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,2,128,1,float16,float16,0,0.17172267039616904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,2,128,1,float16,fp8,0,0.1716853380203247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,2,128,1,fp8,fp8,0,0.23138666152954102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,4,128,1,float16,float16,0,0.17179733514785767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,4,128,1,float16,fp8,0,0.1718613306681315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,4,128,1,fp8,fp8,0,0.23115734259287515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,8,128,1,float16,fp8,0,0.17308799425760904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,8,128,1,fp8,fp8,0,0.23403199513753256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,96,8,128,1,float16,float16,0,0.1728853384653727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,1,128,1,float16,float16,0,0.09243733684221904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,96,128,1,float16,float16,0,0.09567466378211975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,96,128,1,float16,fp8,0,0.09500267108281453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,1,128,1,float16,fp8,0,0.09303466478983562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,96,128,1,fp8,fp8,0,0.1321440041065216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,1,128,1,fp8,fp8,0,0.12337066729863484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,2,128,1,float16,float16,0,0.09334933757781982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,2,128,1,fp8,fp8,0,0.12275733550389607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,4,128,1,float16,float16,0,0.09337600072224934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,4,128,1,float16,fp8,0,0.0937600036462148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,4,128,1,fp8,fp8,0,0.1239413321018219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,2,128,1,float16,fp8,0,0.09309333562850952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,8,128,1,float16,fp8,0,0.09398933251698811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,8,128,1,float16,float16,0,0.09346666932106018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,96,8,128,1,fp8,fp8,0,0.1250933309396108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,1,128,1,float16,float16,0,0.05539200206597646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,96,128,1,float16,float16,0,0.0543093333641688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,96,128,1,float16,fp8,0,0.0543039987484614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,96,128,1,fp8,fp8,0,0.07393600046634674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,1,128,1,float16,fp8,0,0.05608533322811127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,1,128,1,fp8,fp8,0,0.07042666773001353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,2,128,1,float16,fp8,0,0.05578133463859558
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,2,128,1,float16,float16,0,0.05610666672388712
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,2,128,1,fp8,fp8,0,0.07082133491834004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,4,128,1,float16,float16,0,0.05606399973233541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,4,128,1,float16,fp8,0,0.05658133327960968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,4,128,1,fp8,fp8,0,0.07124799986680348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,8,128,1,float16,float16,0,0.05594133337338766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,8,128,1,float16,fp8,0,0.05587733288606008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,96,8,128,1,fp8,fp8,0,0.07154666880766551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,96,128,1,float16,float16,0,0.03475199888149897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,96,128,1,fp8,fp8,0,0.04445866743723551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,96,128,1,float16,fp8,0,0.03519999980926514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,1,128,1,float16,fp8,0,0.03605866680542628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,1,128,1,float16,float16,0,0.035360001027584076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,1,128,1,fp8,fp8,0,0.04367466767628988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,2,128,1,float16,float16,0,0.035386666655540466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,2,128,1,float16,fp8,0,0.0360959991812706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,2,128,1,fp8,fp8,0,0.04331733286380768
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,4,128,1,float16,float16,0,0.03506666670242945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,4,128,1,float16,fp8,0,0.035743998984495796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,4,128,1,fp8,fp8,0,0.04320533573627472
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,8,128,1,float16,float16,0,0.03579200059175491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,8,128,1,float16,fp8,0,0.03614933292071024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,96,8,128,1,fp8,fp8,0,0.04366933306058248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,96,128,1,float16,float16,0,0.02478400121132533
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,96,128,1,float16,fp8,0,0.02439466615517934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,96,128,1,fp8,fp8,0,0.029189333319664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,1,128,1,float16,fp8,0,0.024197332561016083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,1,128,1,float16,float16,0,0.023738667368888855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,1,128,1,fp8,fp8,0,0.02790933350721995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,2,128,1,float16,float16,0,0.02437866727511088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,2,128,1,float16,fp8,0,0.024864000578721363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,2,128,1,fp8,fp8,0,0.028357334434986115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,4,128,1,float16,float16,0,0.024517332514127094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,4,128,1,float16,fp8,0,0.02456533412138621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,4,128,1,fp8,fp8,0,0.02849599967400233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,8,128,1,float16,float16,0,0.02404800057411194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,8,128,1,float16,fp8,0,0.0242399995525678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,96,8,128,1,fp8,fp8,0,0.028336000939210255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,96,128,1,float16,float16,0,0.019925333559513092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,96,128,1,float16,fp8,0,0.020703999946514767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,96,128,1,fp8,fp8,0,0.024725332856178284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,1,128,1,float16,float16,0,0.020725333442290623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,1,128,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,1,128,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,2,128,1,float16,float16,0,0.02021866664290428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,2,128,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,2,128,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,4,128,1,float16,float16,0,0.02092266579469045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,4,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,4,128,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,8,128,1,float16,float16,0,0.02073066681623459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,8,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,96,8,128,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,96,128,1,float16,float16,0,0.016789333273967106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,96,128,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,96,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,1,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,1,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,1,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,2,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,2,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,4,128,1,float16,float16,0,0.016693333784739178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,4,128,1,float16,fp8,0,0.017610666652520496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,8,128,1,float16,fp8,0,0.017792000124851864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,4,128,1,fp8,fp8,0,0.01863466699918111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,2,128,1,fp8,fp8,0,0.018351999421914417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,8,128,1,float16,float16,0,0.016634666671355564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,96,8,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,128,1,fp8,fp8,0,44.0331064860026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,128,1,float16,float16,0,74.75494384765625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,128,1,float16,fp8,0,74.96734110514323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,128,1,fp8,fp8,0,44.624796549479164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,128,1,float16,fp8,0,75.73423767089844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,128,1,float16,fp8,0,74.59018961588542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,128,1,float16,float16,0,75.57962544759114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,128,1,float16,float16,0,78.74204508463542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,128,1,float16,float16,0,34.089637756347656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,128,1,fp8,fp8,0,44.07918802897135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,128,1,fp8,fp8,0,44.7063954671224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,128,1,float16,float16,0,78.56776936848958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,128,1,float16,fp8,0,78.22949727376302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,128,1,float16,float16,0,33.27606455485026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,128,1,float16,fp8,0,34.06945546468099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,128,1,float16,fp8,0,33.21042124430338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,128,1,fp8,fp8,0,22.57593536376953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,128,1,fp8,fp8,0,22.186383565266926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,128,1,float16,fp8,0,33.3154551188151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,128,1,float16,float16,0,33.944836934407554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,128,1,fp8,fp8,0,22.298182169596355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,128,1,float16,float16,0,33.227396647135414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,128,1,fp8,fp8,0,22.19176991780599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,128,1,float16,fp8,0,34.084208170572914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,128,1,float16,float16,0,33.68488057454427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,128,1,float16,fp8,0,33.26411692301432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,128,1,float16,float16,0,16.81410090128581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,128,1,float16,fp8,0,17.14466603597005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,128,1,fp8,fp8,0,22.123573303222656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,128,1,fp8,fp8,0,11.301317850748697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,128,1,float16,float16,0,17.111461639404297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,128,1,float16,fp8,0,16.77364222208659
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,128,1,float16,float16,0,16.83572260538737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,128,1,float16,fp8,0,16.817972819010418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,128,1,fp8,fp8,0,11.478858947753906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,128,1,fp8,fp8,0,11.371920267740885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,128,1,float16,float16,0,16.850096384684246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,128,1,float16,fp8,0,16.87828826904297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,128,1,fp8,fp8,0,11.265605926513672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,128,1,float16,float16,0,16.940692901611328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,128,1,float16,float16,0,8.55894915262858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,128,1,float16,fp8,0,16.742027282714844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,128,1,fp8,fp8,0,11.347551981608072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,128,1,float16,fp8,0,8.62661361694336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,128,1,fp8,fp8,0,5.717152277628581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,128,1,float16,float16,0,8.7980105082194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,128,1,float16,fp8,0,8.50050163269043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,128,1,float16,float16,0,8.698783874511719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,128,1,fp8,fp8,0,5.858095804850261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,128,1,float16,fp8,0,8.530464172363281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,128,1,fp8,fp8,0,5.718613306681315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,128,1,float16,float16,0,8.730341593424479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,128,1,float16,fp8,0,8.50502904256185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,128,1,fp8,fp8,0,5.866992314656575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,128,1,float16,float16,0,8.5306027730306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,128,1,fp8,fp8,0,5.839365641276042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,128,1,float16,fp8,0,8.620794932047525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,128,1,float16,float16,0,38.298693339029946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,128,1,float16,fp8,0,38.346832275390625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,128,1,fp8,fp8,0,26.247238159179688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,128,1,fp8,fp8,0,25.713333129882812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,128,1,float16,float16,0,38.601318359375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,128,1,float16,fp8,0,39.30232493082682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,128,1,float16,float16,0,38.47209676106771
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,128,1,float16,fp8,0,38.67540740966797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,128,1,float16,float16,0,19.529200236002605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,128,1,fp8,fp8,0,25.83563232421875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,128,1,float16,float16,0,38.477638244628906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,128,1,fp8,fp8,0,25.888084411621094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,128,1,float16,fp8,0,39.177711486816406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,128,1,float16,fp8,0,19.104175567626953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,128,1,float16,float16,0,19.556223551432293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,128,1,fp8,fp8,0,13.137797037760416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,128,1,fp8,fp8,0,13.348485310872396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,128,1,float16,float16,0,19.19590886433919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,128,1,float16,fp8,0,19.229403177897137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,128,1,float16,fp8,0,19.870624542236328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,128,1,fp8,fp8,0,13.12445322672526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,128,1,float16,float16,0,19.41856511433919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,128,1,float16,fp8,0,19.29574966430664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,128,1,fp8,fp8,0,13.123563130696615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,128,1,fp8,fp8,0,13.09759521484375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,128,1,float16,float16,0,9.849429448445639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,128,1,float16,fp8,0,19.406021118164062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,128,1,float16,float16,0,19.029844919840496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,128,1,float16,fp8,0,9.774101257324219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,128,1,float16,float16,0,10.188671747843424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,128,1,fp8,fp8,0,6.77076784769694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,128,1,float16,fp8,0,9.84608523050944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,128,1,fp8,fp8,0,6.645253499348958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,128,1,float16,float16,0,9.79034678141276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,128,1,float16,fp8,0,9.732538859049479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,128,1,fp8,fp8,0,6.429440180460612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,128,1,float16,float16,0,9.902522404988607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,128,1,float16,fp8,0,10.066991806030273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,128,1,fp8,fp8,0,6.691669464111328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,128,1,float16,float16,0,10.013071695963541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,128,1,float16,fp8,0,9.763231913248697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,128,1,float16,fp8,0,4.932997385660808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,128,1,float16,float16,0,5.089946746826172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,128,1,fp8,fp8,0,6.624117533365886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,128,1,fp8,fp8,0,3.407989184061686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,128,1,float16,float16,0,4.866906801859538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,128,1,float16,fp8,0,4.918688138326009
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,128,1,fp8,fp8,0,3.4753974278767905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,128,1,float16,float16,0,4.933247884114583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,128,1,float16,fp8,0,4.9964907964070635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,128,1,fp8,fp8,0,3.41105588277181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,128,1,float16,float16,0,4.913205464680989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,128,1,fp8,fp8,0,3.411653200785319
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,128,1,float16,float16,0,4.998213450113933
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,128,1,float16,fp8,0,4.998122533162435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,128,1,float16,fp8,0,4.995541254679362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,128,1,fp8,fp8,0,3.4108959833780923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,128,1,fp8,fp8,0,18.476917266845703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,128,1,float16,float16,0,27.352010091145832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,128,1,float16,fp8,0,27.74822998046875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,128,1,float16,float16,0,27.271705627441406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,128,1,float16,float16,0,27.32306671142578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,128,1,float16,fp8,0,27.01666768391927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,128,1,float16,fp8,0,27.148640950520832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,128,1,fp8,fp8,0,18.910634358723957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,128,1,fp8,fp8,0,18.44549814860026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,128,1,float16,float16,0,27.334490458170574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,128,1,float16,float16,0,13.914661407470703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,128,1,fp8,fp8,0,18.630255381266277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,128,1,float16,fp8,0,27.91979726155599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,128,1,float16,fp8,0,13.669578552246094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,128,1,float16,float16,0,13.72561009724935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,128,1,float16,fp8,0,13.77828852335612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,128,1,fp8,fp8,0,9.58900260925293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,128,1,fp8,fp8,0,9.42464510599772
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,128,1,float16,float16,0,13.952128092447916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,128,1,float16,fp8,0,13.720938364664713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,128,1,fp8,fp8,0,9.478991826375326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,128,1,float16,float16,0,13.6867307027181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,128,1,float16,fp8,0,13.598997751871744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,128,1,fp8,fp8,0,9.25054931640625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,128,1,float16,fp8,0,13.738309224446615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,128,1,float16,float16,0,13.66542943318685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,128,1,fp8,fp8,0,9.410949071248373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,128,1,float16,fp8,0,6.656362533569336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,128,1,float16,float16,0,6.77188237508138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,128,1,float16,float16,0,6.9676157633463545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,128,1,fp8,fp8,0,4.648373285929362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,128,1,float16,fp8,0,7.0223948160807295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,128,1,float16,float16,0,6.779621124267578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,128,1,float16,fp8,0,6.8057810465494795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,128,1,fp8,fp8,0,4.714165369669597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,128,1,fp8,fp8,0,4.868629455566406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,128,1,float16,float16,0,6.912581125895183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,128,1,fp8,fp8,0,4.732837359110515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,128,1,float16,fp8,0,6.846538543701172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,128,1,float16,fp8,0,6.794015884399414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,128,1,float16,float16,0,7.079482396443685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,128,1,fp8,fp8,0,4.780831972757976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,128,1,float16,fp8,0,3.423797289530436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,128,1,float16,float16,0,3.4715092976888022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,128,1,float16,float16,0,3.4696852366129556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,128,1,fp8,fp8,0,2.5007786750793457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,128,1,float16,float16,0,3.5120372772216797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,128,1,float16,fp8,0,3.4907251993815103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,128,1,fp8,fp8,0,2.543935934702555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,128,1,fp8,fp8,0,2.5096054077148438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,128,1,float16,fp8,0,3.466464042663574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,128,1,float16,float16,0,3.4764534632364907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,128,1,float16,fp8,0,3.4412320454915366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,128,1,fp8,fp8,0,2.504650592803955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,128,1,float16,float16,0,3.578831990559896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,128,1,float16,fp8,0,3.433173179626465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,128,1,fp8,fp8,0,2.5073599815368652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,128,1,fp8,fp8,0,24.829620361328125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,128,1,float16,float16,0,37.1253662109375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,128,1,float16,fp8,0,37.2841542561849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,128,1,float16,fp8,0,37.193834940592446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,128,1,float16,float16,0,37.259246826171875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,128,1,fp8,fp8,0,25.14525858561198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,128,1,float16,fp8,0,37.38560485839844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,128,1,float16,float16,0,37.75719451904297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,128,1,float16,float16,0,18.003663380940754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,128,1,fp8,fp8,0,24.898602803548176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,128,1,float16,float16,0,38.50103505452474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,128,1,float16,fp8,0,17.783653259277344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,128,1,fp8,fp8,0,24.908353169759113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,128,1,float16,fp8,0,38.46823374430338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,128,1,float16,float16,0,18.09279505411784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,128,1,float16,fp8,0,18.48950449625651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,128,1,fp8,fp8,0,12.456539154052734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,128,1,fp8,fp8,0,12.991413116455078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,128,1,float16,float16,0,18.02787144978841
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,128,1,float16,fp8,0,17.93017069498698
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,128,1,fp8,fp8,0,12.417935689290365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,128,1,float16,float16,0,18.3951416015625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,128,1,float16,fp8,0,17.838069915771484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,128,1,fp8,fp8,0,12.76381810506185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,128,1,float16,float16,0,18.090234120686848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,128,1,float16,fp8,0,17.870421091715496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,128,1,float16,float16,0,8.945199966430664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,128,1,float16,fp8,0,9.131952285766602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,128,1,fp8,fp8,0,12.519087473551432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,128,1,fp8,fp8,0,6.173285166422526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,128,1,float16,float16,0,9.010800043741861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,128,1,float16,float16,0,9.194512049357096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,128,1,float16,fp8,0,9.106005350748697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,128,1,float16,fp8,0,8.894794464111328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,128,1,fp8,fp8,0,6.341952006022136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,128,1,fp8,fp8,0,6.63926378885905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,128,1,float16,float16,0,8.980133056640625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,128,1,float16,fp8,0,9.011733373006185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,128,1,fp8,fp8,0,6.10101318359375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,128,1,float16,float16,0,9.116938908894857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,128,1,float16,fp8,0,9.013952255249023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,128,1,fp8,fp8,0,6.149349212646484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,128,1,float16,float16,0,4.397109349568685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,128,1,float16,fp8,0,4.44645881652832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,128,1,float16,float16,0,4.68175474802653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,128,1,fp8,fp8,0,3.1711254119873047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,128,1,float16,float16,0,4.512175877888997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,128,1,fp8,fp8,0,3.2720108032226562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,128,1,float16,fp8,0,4.372069358825684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,128,1,float16,fp8,0,4.631957372029622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,128,1,fp8,fp8,0,3.1720800399780273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,128,1,float16,fp8,0,4.575749397277832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,128,1,fp8,fp8,0,3.185728073120117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,128,1,float16,float16,0,4.437253316243489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,128,1,float16,float16,0,4.464805285135905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,128,1,float16,fp8,0,4.347375869750977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,128,1,fp8,fp8,0,3.1824267705281577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,128,1,float16,float16,0,2.342309315999349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,128,1,float16,fp8,0,2.3071680068969727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,128,1,fp8,fp8,0,1.7317280769348145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,128,1,float16,fp8,0,2.33296537399292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,128,1,float16,float16,0,2.3771519660949707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,128,1,fp8,fp8,0,1.775818665822347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,128,1,float16,float16,0,2.346127986907959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,128,1,float16,fp8,0,2.311247984568278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,128,1,fp8,fp8,0,1.7358773549397786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,128,1,float16,float16,0,2.342986742655436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,128,1,float16,fp8,0,2.327733357747396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,128,1,fp8,fp8,0,1.7352693875630696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,128,1,fp8,fp8,0,1.7371892929077148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,128,1,float16,fp8,0,2.3454453150431314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,128,1,float16,float16,0,2.3732852935791016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,128,1,float16,float16,0,21.193988800048828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,128,1,float16,fp8,0,20.991967519124348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,128,1,fp8,fp8,0,14.891440073649088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,128,1,fp8,fp8,0,14.852672576904297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,128,1,float16,float16,0,21.36681620279948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,128,1,float16,float16,0,21.541722615559895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,128,1,float16,fp8,0,21.208607991536457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,128,1,float16,fp8,0,21.49469757080078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,128,1,float16,float16,0,10.680122375488281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,128,1,fp8,fp8,0,14.813466389973959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,128,1,float16,float16,0,21.461713155110676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,128,1,fp8,fp8,0,14.9508056640625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,128,1,float16,fp8,0,10.534645080566406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,128,1,float16,fp8,0,20.921498616536457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,128,1,fp8,fp8,0,7.43116823832194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,128,1,float16,float16,0,10.855541229248047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,128,1,float16,fp8,0,11.003013610839844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,128,1,fp8,fp8,0,7.92140261332194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,128,1,float16,float16,0,10.716932932535807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,128,1,float16,fp8,0,10.44491195678711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,128,1,fp8,fp8,0,7.488074620564778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,128,1,float16,float16,0,10.6517333984375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,128,1,float16,fp8,0,10.45802116394043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,128,1,fp8,fp8,0,7.375066757202148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,128,1,float16,float16,0,11.000272115071615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,128,1,float16,fp8,0,10.652629216512045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,128,1,fp8,fp8,0,7.51472536722819
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,128,1,float16,float16,0,5.045328140258789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,128,1,float16,fp8,0,5.244981447855632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,128,1,fp8,fp8,0,3.7341438929239907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,128,1,float16,float16,0,5.334490458170573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,128,1,float16,fp8,0,5.4813385009765625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,128,1,float16,fp8,0,5.2538401285807295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,128,1,float16,float16,0,5.352282842000325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,128,1,fp8,fp8,0,3.7298666636149087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,128,1,fp8,fp8,0,3.923877398173014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,128,1,float16,float16,0,5.185482660929362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,128,1,float16,fp8,0,5.3527037302653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,128,1,fp8,fp8,0,3.743605295817057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,128,1,float16,fp8,0,5.234085400899251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,128,1,float16,float16,0,5.277658780415853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,128,1,float16,float16,0,2.6764745712280273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,128,1,fp8,fp8,0,3.760437329610189
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,128,1,float16,float16,0,2.6595306396484375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,128,1,float16,fp8,0,2.603797276814779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,128,1,float16,fp8,0,2.6548800468444824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,128,1,fp8,fp8,0,2.061509291330973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,128,1,fp8,fp8,0,1.9752426147460938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,128,1,float16,fp8,0,2.639296054840088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,128,1,fp8,fp8,0,1.9790026346842449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,128,1,float16,float16,0,2.6882667541503906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,128,1,float16,float16,0,2.641962687174479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,128,1,float16,fp8,0,2.6276747385660806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,128,1,fp8,fp8,0,1.9756959279378254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,128,1,float16,float16,0,2.667663892110189
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,128,1,float16,float16,0,1.440901279449463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,128,1,float16,fp8,0,2.6164587338765464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,128,1,fp8,fp8,0,1.988378683725993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,128,1,float16,float16,0,1.4481120109558105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,128,1,float16,fp8,0,1.4485173225402832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,128,1,float16,fp8,0,1.4150773684183757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,128,1,fp8,fp8,0,1.1334826946258545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,128,1,fp8,fp8,0,1.0998773574829102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,128,1,float16,float16,0,1.4428480466206868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,128,1,float16,fp8,0,1.4186879793802898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,128,1,fp8,fp8,0,1.1015253067016602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,128,1,float16,float16,0,1.4447520573933919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,128,1,fp8,fp8,0,1.1005866527557373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,128,1,float16,fp8,0,1.4311359723409016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,128,1,float16,float16,0,1.4427679379781086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,128,1,float16,fp8,0,1.4296053250630696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,128,1,fp8,fp8,0,1.106554667154948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,128,1,float16,float16,0,21.58008066813151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,128,1,float16,fp8,0,22.083290100097656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,128,1,fp8,fp8,0,15.043824513753256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,128,1,fp8,fp8,0,15.01796849568685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,128,1,float16,float16,0,21.685930887858074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,128,1,float16,fp8,0,21.607838948567707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,128,1,float16,fp8,0,21.448089599609375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,128,1,float16,float16,0,21.982630411783855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,128,1,float16,float16,0,10.176997502644857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,128,1,fp8,fp8,0,15.021418253580729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,128,1,float16,float16,0,22.77246348063151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,128,1,float16,fp8,0,10.125392278035482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,128,1,float16,fp8,0,21.73967997233073
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,128,1,fp8,fp8,0,15.1093381245931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,128,1,fp8,fp8,0,8.004688262939453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,128,1,float16,fp8,0,10.743264516194662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,128,1,fp8,fp8,0,7.488351821899414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,128,1,float16,float16,0,10.558965047200521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,128,1,float16,float16,0,10.473125457763672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,128,1,float16,fp8,0,10.239941279093424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,128,1,fp8,fp8,0,7.424234390258789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,128,1,float16,float16,0,10.285765329996744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,128,1,float16,fp8,0,10.197349548339844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,128,1,fp8,fp8,0,7.555472056070964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,128,1,float16,float16,0,10.325712203979492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,128,1,float16,fp8,0,10.258853276570639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,128,1,fp8,fp8,0,7.630682627360026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,128,1,float16,float16,0,4.956570625305176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,128,1,float16,fp8,0,4.865317344665527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,128,1,fp8,fp8,0,3.7552480697631836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,128,1,float16,fp8,0,4.980586687723796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,128,1,float16,float16,0,5.2212371826171875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,128,1,float16,float16,0,4.963786760965983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,128,1,fp8,fp8,0,4.012309392293294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,128,1,float16,fp8,0,5.15997854868571
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,128,1,fp8,fp8,0,3.7628533045450845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,128,1,float16,float16,0,5.064138730367024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,128,1,float16,fp8,0,4.999013264973958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,128,1,fp8,fp8,0,3.7633654276529946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,128,1,float16,float16,0,5.136506716410319
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,128,1,float16,fp8,0,5.067791938781738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,128,1,float16,float16,0,2.5162293116251626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,128,1,fp8,fp8,0,3.780165354410807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,128,1,float16,fp8,0,2.4785119692484536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,128,1,fp8,fp8,0,1.9480746587117512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,128,1,float16,float16,0,2.621760050455729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,128,1,float16,fp8,0,2.572394688924154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,128,1,float16,fp8,0,2.4785547256469727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,128,1,float16,float16,0,2.5175253550211587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,128,1,fp8,fp8,0,2.0937439600626626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,128,1,fp8,fp8,0,1.9482347170511882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,128,1,float16,float16,0,2.5178240140279136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,128,1,float16,fp8,0,2.494778633117676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,128,1,float16,float16,0,2.535360018412272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,128,1,fp8,fp8,0,1.9595573743184407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,128,1,float16,fp8,0,2.495920022328695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,128,1,fp8,fp8,0,1.9633173942565918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,128,1,float16,float16,0,1.344490687052409
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,128,1,float16,fp8,0,1.3131039937337239
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,128,1,float16,float16,0,1.3791626294453938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,128,1,float16,fp8,0,1.3567147254943848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,128,1,float16,float16,0,1.3432799975077312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,128,1,fp8,fp8,0,1.1127893129984539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,128,1,fp8,fp8,0,1.0514933268229167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,128,1,float16,fp8,0,1.3205546538035076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,128,1,fp8,fp8,0,1.05294934908549
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,128,1,float16,float16,0,1.3452587127685547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,128,1,float16,fp8,0,1.3256426652272542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,128,1,fp8,fp8,0,1.0531520048777263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,128,1,float16,float16,0,1.3527040481567383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,128,1,float16,fp8,0,1.3211572964986165
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,128,1,float16,float16,0,0.7581653594970703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,128,1,float16,fp8,0,0.7550240357716879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,128,1,fp8,fp8,0,1.0596746603647869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,128,1,float16,float16,0,0.7550079822540283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,128,1,float16,fp8,0,0.7369759877522787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,128,1,fp8,fp8,0,0.6035199960072836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,128,1,float16,float16,0,0.753210703531901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,128,1,fp8,fp8,0,0.6354133288065592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,128,1,float16,fp8,0,0.7386933167775472
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,128,1,fp8,fp8,0,0.6040319999059042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,128,1,float16,float16,0,0.7586399714152018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,128,1,fp8,fp8,0,0.6052320003509521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,128,1,float16,fp8,0,0.7427999973297119
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,128,1,float16,fp8,0,0.7397332986195883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,128,1,float16,float16,0,0.7580426534016927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,128,1,fp8,fp8,0,0.6067733367284139
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,128,1,float16,fp8,0,12.295247395833334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,128,1,float16,float16,0,12.529088338216146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,128,1,fp8,fp8,0,9.536127726236979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,128,1,fp8,fp8,0,9.407424290974935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,128,1,float16,fp8,0,12.500170389811197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,128,1,float16,float16,0,12.565755208333334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,128,1,float16,float16,0,12.641456604003906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,128,1,float16,fp8,0,12.474170684814453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,128,1,float16,float16,0,6.183663686116536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,128,1,fp8,fp8,0,9.443679809570312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,128,1,float16,fp8,0,6.056496302286784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,128,1,float16,float16,0,12.721477508544922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,128,1,fp8,fp8,0,9.49452273050944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,128,1,float16,fp8,0,12.645253499348959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,128,1,fp8,fp8,0,5.107301394144694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,128,1,float16,float16,0,6.611589431762695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,128,1,fp8,fp8,0,4.716778755187988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,128,1,float16,fp8,0,6.397317250569661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,128,1,float16,float16,0,6.07536506652832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,128,1,fp8,fp8,0,4.717701276143392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,128,1,float16,fp8,0,6.155738830566406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,128,1,float16,float16,0,6.190080006917317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,128,1,float16,fp8,0,6.139968236287435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,128,1,fp8,fp8,0,4.742261250813802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,128,1,float16,float16,0,6.249792098999023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,128,1,float16,fp8,0,5.887162526448567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,128,1,float16,float16,0,3.028239885965983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,128,1,fp8,fp8,0,4.765568097432454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,128,1,float16,fp8,0,2.9866345723470054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,128,1,fp8,fp8,0,2.400533358256022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,128,1,float16,float16,0,3.0192693074544272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,128,1,float16,float16,0,3.14247989654541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,128,1,float16,fp8,0,3.127253214518229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,128,1,float16,fp8,0,2.96451727549235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,128,1,fp8,fp8,0,2.4054932594299316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,128,1,float16,float16,0,3.054527918497721
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,128,1,fp8,fp8,0,2.617034594217936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,128,1,float16,fp8,0,2.983562787373861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,128,1,fp8,fp8,0,2.4128692944844565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,128,1,float16,float16,0,3.0333334604899087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,128,1,float16,fp8,0,2.9889707565307617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,128,1,float16,float16,0,1.575610637664795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,128,1,fp8,fp8,0,2.4327732721964517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,128,1,float16,fp8,0,1.5405492782592773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,128,1,fp8,fp8,0,1.2616746425628662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,128,1,float16,float16,0,1.6269440650939941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,128,1,float16,float16,0,1.5734079678853352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,128,1,float16,fp8,0,1.6305066744486492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,128,1,float16,fp8,0,1.546170711517334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,128,1,fp8,fp8,0,1.2657919724782307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,128,1,fp8,fp8,0,1.3782827059427898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,128,1,float16,fp8,0,1.5431359608968098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,128,1,float16,float16,0,1.5731147130330403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,128,1,fp8,fp8,0,1.2683146794637044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,128,1,float16,float16,0,1.5810240109761555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,128,1,fp8,fp8,0,1.2775786717732747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,128,1,float16,fp8,0,1.5604160626729329
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,128,1,float16,float16,0,0.8491946856180826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,128,1,float16,fp8,0,0.8331733544667562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,128,1,float16,float16,0,0.8705653349558512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,128,1,fp8,fp8,0,0.738858699798584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,128,1,float16,fp8,0,0.8700853188832601
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,128,1,float16,float16,0,0.8525919914245605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,128,1,fp8,fp8,0,0.6904106934865316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,128,1,float16,fp8,0,0.8348000049591064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,128,1,fp8,fp8,0,0.6908853054046631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,128,1,float16,float16,0,0.8521813551584879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,128,1,float16,fp8,0,0.8392852942148844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,128,1,fp8,fp8,0,0.6940159797668457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,128,1,float16,fp8,0,0.8373760382334391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,128,1,fp8,fp8,0,0.6973386605580648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,128,1,float16,float16,0,0.8584799766540527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,128,1,float16,float16,0,0.49032000700632733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,128,1,float16,float16,0,0.49993598461151123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,128,1,float16,fp8,0,0.48898132642110187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,128,1,fp8,fp8,0,0.41722134749094647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,128,1,float16,fp8,0,0.47678931554158527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,128,1,float16,float16,0,0.49127999941507977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,128,1,fp8,fp8,0,0.38923199971516925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,128,1,float16,fp8,0,0.48199466864267987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,128,1,fp8,fp8,0,0.39025068283081055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,128,1,float16,fp8,0,0.47977598508199054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,128,1,float16,float16,0,0.4928426742553711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,128,1,fp8,fp8,0,0.3905973434448242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,128,1,float16,float16,0,0.49211732546488446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,128,1,float16,fp8,0,0.4808266560236613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,128,1,fp8,fp8,0,0.3924959897994995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,128,1,float16,fp8,0,13.67624537150065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,128,1,fp8,fp8,0,10.315077463785807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,128,1,fp8,fp8,0,10.312826792399088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,128,1,float16,fp8,0,13.742485046386719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,128,1,float16,float16,0,14.152528127034506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,128,1,float16,float16,0,14.15462875366211
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,128,1,float16,float16,0,14.05832036336263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,128,1,float16,fp8,0,13.829893747965494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,128,1,float16,float16,0,6.249589284261067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,128,1,fp8,fp8,0,10.35595703125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,128,1,float16,fp8,0,13.891376495361328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,128,1,float16,float16,0,14.26632563273112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,128,1,float16,fp8,0,6.1788584391276045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,128,1,fp8,fp8,0,10.433797200520834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,128,1,fp8,fp8,0,5.661680221557617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,128,1,float16,float16,0,6.773285547892253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,128,1,fp8,fp8,0,5.151909192403157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,128,1,float16,fp8,0,6.885072072347005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,128,1,float16,float16,0,6.276485443115234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,128,1,float16,fp8,0,6.185178756713867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,128,1,fp8,fp8,0,5.162207921346028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,128,1,float16,float16,0,6.237754821777344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,128,1,float16,fp8,0,6.192911783854167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,128,1,fp8,fp8,0,5.187573432922363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,128,1,float16,float16,0,6.265920003255208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,128,1,fp8,fp8,0,5.212261199951172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,128,1,float16,fp8,0,6.301877339680989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,128,1,float16,float16,0,3.0916748046875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,128,1,float16,fp8,0,3.053936004638672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,128,1,fp8,fp8,0,2.585792064666748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,128,1,float16,float16,0,3.0894505182902017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,128,1,float16,fp8,0,3.0387465159098306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,128,1,float16,fp8,0,3.2779785792032876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,128,1,fp8,fp8,0,2.8703571955362954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,128,1,fp8,fp8,0,2.587600072224935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,128,1,float16,float16,0,3.278437296549479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,128,1,float16,float16,0,3.1181440353393555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,128,1,float16,fp8,0,3.0352052052815757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,128,1,fp8,fp8,0,2.595893383026123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,128,1,float16,float16,0,1.593392054239909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,128,1,float16,float16,0,3.1318718592325845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,128,1,float16,fp8,0,3.0559733708699546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,128,1,fp8,fp8,0,2.6215200424194336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,128,1,float16,fp8,0,1.5506827036539714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,128,1,float16,float16,0,1.6815093358357747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,128,1,fp8,fp8,0,1.3382666905721028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,128,1,float16,fp8,0,1.6594079335530598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,128,1,float16,float16,0,1.600671927134196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,128,1,fp8,fp8,0,1.3404426574707031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,128,1,fp8,fp8,0,1.4843200047810872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,128,1,float16,fp8,0,1.5578932762145996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,128,1,float16,float16,0,1.5985706647237141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,128,1,float16,fp8,0,1.558677355448405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,128,1,fp8,fp8,0,1.347973346710205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,128,1,float16,fp8,0,1.5730986595153809
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,128,1,fp8,fp8,0,1.3535680770874023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,128,1,float16,float16,0,0.8399093151092529
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,128,1,float16,float16,0,0.8847893079121908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,128,1,float16,fp8,0,0.8749439716339111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,128,1,fp8,fp8,0,0.7151573499043783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,128,1,float16,float16,0,1.6026132901509602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,128,1,fp8,fp8,0,0.7901706695556641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,128,1,float16,fp8,0,0.8208906650543213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,128,1,float16,float16,0,0.8437600135803223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,128,1,float16,fp8,0,0.8215253353118896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,128,1,fp8,fp8,0,0.7152907053629557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,128,1,float16,float16,0,0.845199982325236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,128,1,float16,fp8,0,0.8254613081614176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,128,1,float16,float16,0,0.846501350402832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,128,1,fp8,fp8,0,0.7157973448435465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,128,1,float16,fp8,0,0.8288373152414957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,128,1,fp8,fp8,0,0.7216906547546387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,128,1,float16,float16,0,0.482746680577596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,128,1,float16,float16,0,0.4673600196838379
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,128,1,float16,fp8,0,0.45305601755777997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,128,1,float16,fp8,0,0.4787413279215495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,128,1,float16,float16,0,0.4679679870605469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,128,1,fp8,fp8,0,0.39769065380096436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,128,1,float16,fp8,0,0.4551253318786621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,128,1,fp8,fp8,0,0.3993866840998332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,128,1,float16,float16,0,0.4675893386205037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,128,1,fp8,fp8,0,0.4333173433939616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,128,1,float16,fp8,0,0.455077330271403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,128,1,fp8,fp8,0,0.40165332953135174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,128,1,float16,float16,0,0.47093868255615234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,128,1,fp8,fp8,0,0.4040373166402181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,128,1,float16,fp8,0,0.4580959876378377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,128,1,float16,float16,0,0.2784266670544942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,128,1,float16,float16,0,0.28571732838948566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,128,1,float16,fp8,0,0.27108800411224365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,128,1,float16,fp8,0,0.2816480000813802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,128,1,fp8,fp8,0,0.23052799701690674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,128,1,float16,float16,0,0.27875200907389325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,128,1,float16,fp8,0,0.27006399631500244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,128,1,fp8,fp8,0,0.24966400861740112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,128,1,fp8,fp8,0,0.22898133595784506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,128,1,float16,float16,0,0.28012265761693317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,128,1,float16,fp8,0,0.2729439934094747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,128,1,fp8,fp8,0,0.2309066653251648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,128,1,float16,float16,0,0.2816480000813802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,128,1,float16,fp8,0,0.273525337378184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,128,1,fp8,fp8,0,0.23083200057347616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,128,1,float16,float16,0,8.199349085489908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,128,1,float16,fp8,0,7.889642715454102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,128,1,fp8,fp8,0,6.890207926432292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,128,1,float16,float16,0,8.172880172729492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,128,1,float16,fp8,0,8.051962534586588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,128,1,fp8,fp8,0,6.918885548909505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,128,1,float16,float16,0,8.263888041178385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,128,1,float16,fp8,0,7.77785046895345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,128,1,fp8,fp8,0,6.942666371663411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,128,1,float16,float16,0,3.960597356160482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,128,1,float16,float16,0,8.220287958780924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,128,1,fp8,fp8,0,6.997999827067058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,128,1,float16,fp8,0,8.01531728108724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,128,1,float16,fp8,0,4.243541399637858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,128,1,fp8,fp8,0,3.8502025604248047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,128,1,float16,float16,0,4.355578740437825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,128,1,float16,fp8,0,3.8795413970947266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,128,1,float16,float16,0,3.9892053604125977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,128,1,float16,fp8,0,3.939056078592936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,128,1,fp8,fp8,0,3.4480374654134116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,128,1,fp8,fp8,0,3.432821273803711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,128,1,float16,float16,0,3.982682545979818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,128,1,float16,fp8,0,3.8742825190226235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,128,1,fp8,fp8,0,3.465717315673828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,128,1,float16,fp8,0,3.8959786097208657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,128,1,float16,float16,0,4.005925178527832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,128,1,fp8,fp8,0,3.498533248901367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,128,1,float16,float16,0,2.0025760332743325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,128,1,float16,float16,0,2.1491467157999673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,128,1,float16,fp8,0,1.9561066627502441
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,128,1,float16,fp8,0,2.1156533559163413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,128,1,fp8,fp8,0,1.7340426445007324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,128,1,float16,fp8,0,1.9546133677164714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,128,1,float16,float16,0,2.0057387351989746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,128,1,fp8,fp8,0,1.7404853502909343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,128,1,fp8,fp8,0,1.9581173261006672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,128,1,float16,float16,0,2.008080005645752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,128,1,float16,fp8,0,1.9598506291707356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,128,1,fp8,fp8,0,1.7446346282958984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,128,1,float16,float16,0,2.0172425905863443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,128,1,float16,fp8,0,1.9746400515238445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,128,1,float16,float16,0,1.0392426649729412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,128,1,fp8,fp8,0,1.7636906305948894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,128,1,float16,fp8,0,1.0089226563771565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,128,1,float16,float16,0,1.1054773330688477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,128,1,float16,fp8,0,1.090341329574585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,128,1,fp8,fp8,0,0.9025013446807861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,128,1,fp8,fp8,0,1.017407973607381
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,128,1,float16,float16,0,1.0464693705240886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,128,1,float16,fp8,0,1.0180266698201497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,128,1,fp8,fp8,0,0.9082240263621012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,128,1,float16,fp8,0,1.0157492955525715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,128,1,float16,float16,0,1.0422613620758057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,128,1,fp8,fp8,0,0.9125493367513021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,128,1,float16,float16,0,1.0474879741668701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,128,1,float16,fp8,0,1.0200533072153728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,128,1,fp8,fp8,0,0.9220853646596273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,128,1,float16,float16,0,0.5564373334248861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,128,1,float16,fp8,0,0.5408373276392618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,128,1,float16,float16,0,0.5883200168609619
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,128,1,fp8,fp8,0,0.4870719909667969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,128,1,float16,float16,0,0.5571146806081136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,128,1,float16,fp8,0,0.5814346472422282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,128,1,float16,fp8,0,0.5435520013173422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,128,1,fp8,fp8,0,0.5478293498357137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,128,1,fp8,fp8,0,0.48934932549794513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,128,1,float16,float16,0,0.5574186642964681
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,128,1,float16,fp8,0,0.5445706844329834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,128,1,fp8,fp8,0,0.4909013509750366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,128,1,float16,float16,0,0.560965339342753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,128,1,float16,fp8,0,0.5468426545461019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,128,1,fp8,fp8,0,0.4965706666310628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,128,1,float16,float16,0,0.3139999906222026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,128,1,float16,fp8,0,0.30373867352803546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,128,1,fp8,fp8,0,0.2696373263994853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,128,1,float16,fp8,0,0.32359999418258667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,128,1,float16,float16,0,0.32822932799657184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,128,1,fp8,fp8,0,0.3025173346201579
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,128,1,float16,fp8,0,0.30617066224416095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,128,1,float16,float16,0,0.31676799058914185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,128,1,fp8,fp8,0,0.2697333296140035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,128,1,float16,float16,0,0.3157599965731303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,128,1,float16,fp8,0,0.30669333537419635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,128,1,fp8,fp8,0,0.27030932903289795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,128,1,float16,float16,0,0.3178773323694865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,128,1,float16,fp8,0,0.3091040054957072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,128,1,fp8,fp8,0,0.27290133635203045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,128,1,float16,float16,0,0.17965332667032877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,128,1,float16,fp8,0,0.17385600010553995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,128,1,float16,float16,0,0.19315199057261148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,128,1,fp8,fp8,0,0.17679466803868613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,128,1,fp8,fp8,0,0.16375999649365744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,128,1,float16,fp8,0,0.18930133183797201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,128,1,float16,float16,0,0.17997332413991293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,128,1,float16,fp8,0,0.17523199319839478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,128,1,fp8,fp8,0,0.16403200229008993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,128,1,float16,float16,0,0.18024533987045288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,128,1,fp8,fp8,0,0.16433599591255188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,128,1,float16,fp8,0,0.17547200123469034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,128,1,float16,float16,0,0.1809920072555542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,128,1,float16,fp8,0,0.17518933614095053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,128,1,fp8,fp8,0,0.16581867138544717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,128,1,float16,float16,0,9.121418635050455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,128,1,float16,fp8,0,8.992250442504883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,128,1,fp8,fp8,0,6.823157628377278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,128,1,float16,float16,0,8.812447865804037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,128,1,float16,fp8,0,9.00007438659668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,128,1,float16,fp8,0,8.940874735514322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,128,1,fp8,fp8,0,6.84445317586263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,128,1,float16,float16,0,8.917402903238932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,128,1,float16,float16,0,3.764714558919271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,128,1,fp8,fp8,0,6.949002583821614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,128,1,float16,fp8,0,9.11026128133138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,128,1,float16,float16,0,9.222117106119791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,128,1,fp8,fp8,0,7.002447764078776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,128,1,float16,float16,0,4.553269386291504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,128,1,fp8,fp8,0,3.4010559717814126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,128,1,float16,fp8,0,3.755077362060547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,128,1,fp8,fp8,0,3.932069460550944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,128,1,float16,fp8,0,4.505359967549642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,128,1,float16,float16,0,3.8402398427327475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,128,1,float16,fp8,0,3.8137973149617515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,128,1,fp8,fp8,0,3.3950134913126626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,128,1,float16,float16,0,3.8700265884399414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,128,1,float16,fp8,0,3.9102347691853843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,128,1,fp8,fp8,0,3.4581387837727866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,128,1,float16,float16,0,3.9557441075642905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,128,1,float16,fp8,0,3.9754505157470703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,128,1,fp8,fp8,0,3.4955199559529624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,128,1,float16,float16,0,1.8891092936197917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,128,1,float16,fp8,0,1.8899787267049153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,128,1,float16,float16,0,2.26310396194458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,128,1,float16,fp8,0,2.190117359161377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,128,1,fp8,fp8,0,1.695258617401123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,128,1,float16,float16,0,1.8930080731709797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,128,1,float16,fp8,0,1.8971306482950847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,128,1,fp8,fp8,0,1.7024213473002117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,128,1,fp8,fp8,0,1.7255946795145671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,128,1,float16,fp8,0,1.922592004140218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,128,1,float16,float16,0,1.905471960703532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,128,1,fp8,fp8,0,1.9713066418965657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,128,1,float16,float16,0,1.94048007329305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,128,1,float16,fp8,0,1.9459519386291504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,128,1,float16,float16,0,0.9626719951629639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,128,1,fp8,fp8,0,1.7450720469156902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,128,1,float16,fp8,0,0.9632533391316732
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,128,1,fp8,fp8,0,0.8448159694671631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,128,1,float16,float16,0,1.1244266827901204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,128,1,float16,float16,0,0.9645919799804688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,128,1,float16,fp8,0,0.9636267026265463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,128,1,fp8,fp8,0,0.851482629776001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,128,1,float16,float16,0,0.9690453211466471
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,128,1,float16,fp8,0,1.1035412947336833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,128,1,fp8,fp8,0,0.9895146687825521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,128,1,float16,fp8,0,0.9699146747589111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,128,1,fp8,fp8,0,0.8741973241170248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,128,1,float16,float16,0,0.9753759702046713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,128,1,float16,fp8,0,0.9766613642374674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,128,1,float16,float16,0,0.49701865514119464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,128,1,float16,fp8,0,0.4960213502248128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,128,1,fp8,fp8,0,0.8815893332163492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,128,1,float16,fp8,0,0.5582666794459025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,128,1,float16,float16,0,0.5728746652603149
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,128,1,fp8,fp8,0,0.5044426520665487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,128,1,fp8,fp8,0,0.435535987218221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,128,1,float16,float16,0,0.4975200096766154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,128,1,float16,fp8,0,0.4979413350423177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,128,1,fp8,fp8,0,0.4376000165939331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,128,1,float16,float16,0,0.49857600529988605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,128,1,float16,fp8,0,0.49925867716471356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,128,1,fp8,fp8,0,0.43971200784047443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,128,1,float16,float16,0,0.5034933487574259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,128,1,float16,fp8,0,0.5037546555201212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,128,1,float16,float16,0,0.2614240050315857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,128,1,float16,fp8,0,0.26208533843358356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,128,1,fp8,fp8,0,0.446394681930542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,128,1,fp8,fp8,0,0.2313493291536967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,128,1,float16,float16,0,0.30156266689300537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,128,1,float16,fp8,0,0.2960053284962972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,128,1,fp8,fp8,0,0.2643306652704875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,128,1,float16,float16,0,0.26476800441741943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,128,1,float16,fp8,0,0.26335465908050537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,128,1,fp8,fp8,0,0.2320586641629537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,128,1,float16,fp8,0,0.26578134298324585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,128,1,fp8,fp8,0,0.2320853273073832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,128,1,float16,float16,0,0.2657653292020162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,128,1,float16,float16,0,0.26746666431427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,128,1,float16,fp8,0,0.2672746578852336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,128,1,fp8,fp8,0,0.23536000649134317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,128,1,float16,float16,0,0.14385599891344705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,128,1,float16,float16,0,0.1650773286819458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,128,1,fp8,fp8,0,0.1218293309211731
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,128,1,float16,float16,0,0.14429333806037903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,128,1,fp8,fp8,0,0.139984001715978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,128,1,float16,fp8,0,0.16148799657821655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,128,1,float16,fp8,0,0.14436800281206766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,128,1,float16,fp8,0,0.1450826625029246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,128,1,fp8,fp8,0,0.12199466427167256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,128,1,float16,float16,0,0.14604266484578451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,128,1,float16,fp8,0,0.1460640033086141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,128,1,float16,float16,0,0.14726932843526205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,128,1,fp8,fp8,0,0.12340266505877177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,128,1,float16,fp8,0,0.14843733112017313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,128,1,fp8,fp8,0,0.12454932928085327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,128,1,float16,float16,0,0.08868267138799031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,128,1,float16,float16,0,0.07644266883532207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,128,1,float16,fp8,0,0.07694933315118153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,128,1,float16,fp8,0,0.08715732892354329
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,128,1,fp8,fp8,0,0.0792906681696574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,128,1,float16,float16,0,0.07631466786066692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,128,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,128,1,fp8,fp8,0,0.0677706648906072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,128,1,float16,float16,0,0.07690133154392242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,128,1,float16,fp8,0,0.07735466460386912
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,128,1,fp8,fp8,0,0.06889066596825917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,128,1,float16,fp8,0,0.07711466650168101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,128,1,float16,float16,0,0.07762133578459422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,128,1,float16,fp8,0,0.077824001510938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,128,1,fp8,fp8,0,0.07045866549015045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,1,128,1,float16,fp8,0,6.6171518961588545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,1,128,1,float16,float16,0,6.63864008585612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,1,128,1,fp8,fp8,0,5.657162984212239
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,2,128,1,fp8,fp8,0,5.667162577311198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,2,128,1,float16,float16,0,6.60638427734375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,2,128,1,float16,fp8,0,6.564512252807617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,4,128,1,float16,float16,0,6.6921335856119795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,4,128,1,float16,fp8,0,6.603514353434245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,1,128,1,float16,float16,0,2.95576540629069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,4,128,1,fp8,fp8,0,5.732378641764323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,8,128,1,fp8,fp8,0,5.8096052805582685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,8,128,1,float16,float16,0,6.837706883748372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,64,8,128,1,float16,fp8,0,6.676352183024089
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,1,128,1,float16,fp8,0,2.934272130330404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,64,128,1,float16,float16,0,3.6615893046061196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,64,128,1,float16,fp8,0,3.5663839975992837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,64,128,1,fp8,fp8,0,3.3506186803181968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,1,128,1,fp8,fp8,0,2.7977867126464844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,2,128,1,float16,float16,0,2.9569921493530273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,2,128,1,float16,fp8,0,2.9328959782918296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,2,128,1,fp8,fp8,0,2.8076585133870444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,4,128,1,float16,float16,0,3.0219039916992188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,4,128,1,float16,fp8,0,2.9988320668538413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,4,128,1,fp8,fp8,0,2.856543858846029
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,8,128,1,float16,float16,0,3.0606559117635093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,8,128,1,float16,fp8,0,3.0616321563720703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,1,128,1,float16,float16,0,1.4580532709757488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,64,8,128,1,fp8,fp8,0,2.901866594950358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,1,128,1,float16,fp8,0,1.4591466585795085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,1,128,1,fp8,fp8,0,1.396714687347412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,64,128,1,float16,float16,0,1.8632639249165852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,64,128,1,float16,fp8,0,1.7803254127502441
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,2,128,1,float16,float16,0,1.4676106770833333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,2,128,1,float16,fp8,0,1.4669067064921062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,64,128,1,fp8,fp8,0,1.6796266237894695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,2,128,1,fp8,fp8,0,1.4002666473388672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,4,128,1,float16,float16,0,1.4770240783691406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,4,128,1,float16,fp8,0,1.4805013338724773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,4,128,1,fp8,fp8,0,1.431125322977702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,8,128,1,float16,float16,0,1.4983466466267903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,8,128,1,float16,fp8,0,1.5258933703104656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,64,8,128,1,fp8,fp8,0,1.4461119969685872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,1,128,1,float16,float16,0,0.7435839970906576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,1,128,1,float16,fp8,0,0.7414666811625162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,1,128,1,fp8,fp8,0,0.6947147051493326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,64,128,1,float16,float16,0,0.9252160390218099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,64,128,1,float16,fp8,0,0.8983893394470215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,2,128,1,float16,float16,0,0.7450133164723715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,2,128,1,float16,fp8,0,0.745807965596517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,2,128,1,fp8,fp8,0,0.6985973517100016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,64,128,1,fp8,fp8,0,0.835424025853475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,4,128,1,float16,float16,0,0.7475893497467041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,4,128,1,float16,fp8,0,0.7489173412322998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,8,128,1,float16,float16,0,0.7538399696350098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,8,128,1,float16,fp8,0,0.7579840024312338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,4,128,1,fp8,fp8,0,0.720357338587443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,64,8,128,1,fp8,fp8,0,0.7264373302459717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,1,128,1,float16,float16,0,0.3842506806055705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,1,128,1,fp8,fp8,0,0.35765333970387775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,64,128,1,float16,float16,0,0.46515198548634845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,1,128,1,float16,fp8,0,0.38257598876953125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,2,128,1,float16,float16,0,0.3837706645329793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,64,128,1,fp8,fp8,0,0.428607980410258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,2,128,1,float16,fp8,0,0.3848106861114502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,2,128,1,fp8,fp8,0,0.35866133371988934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,4,128,1,float16,float16,0,0.3865120013554891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,4,128,1,fp8,fp8,0,0.36179200808207196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,4,128,1,float16,fp8,0,0.3853706518809001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,64,128,1,float16,fp8,0,0.4527680079142253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,1,128,1,float16,float16,0,0.20363199710845947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,8,128,1,float16,fp8,0,0.39157334963480633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,8,128,1,fp8,fp8,0,0.3685813347498576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,64,8,128,1,float16,float16,0,0.389413317044576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,64,128,1,float16,float16,0,0.24661332368850708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,1,128,1,float16,fp8,0,0.2046346664428711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,64,128,1,fp8,fp8,0,0.22472000122070312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,64,128,1,float16,fp8,0,0.24016533295313516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,1,128,1,fp8,fp8,0,0.19131199518839517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,2,128,1,float16,float16,0,0.20498132705688477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,2,128,1,fp8,fp8,0,0.19172267119089761
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,2,128,1,float16,fp8,0,0.20572799444198608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,4,128,1,float16,float16,0,0.2064639925956726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,4,128,1,fp8,fp8,0,0.19313067197799683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,8,128,1,float16,float16,0,0.20753065745035806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,4,128,1,float16,fp8,0,0.20681599775950113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,8,128,1,float16,fp8,0,0.20735466480255127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,64,8,128,1,fp8,fp8,0,0.1950506567955017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,1,128,1,float16,float16,0,0.11157332857449849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,1,128,1,float16,fp8,0,0.11244799693425496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,64,128,1,float16,float16,0,0.13519466916720072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,64,128,1,float16,fp8,0,0.13209066788355509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,2,128,1,float16,float16,0,0.11187199751536052
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,64,128,1,fp8,fp8,0,0.12198399504025777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,1,128,1,fp8,fp8,0,0.1058026651541392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,2,128,1,float16,fp8,0,0.11294399698575337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,2,128,1,fp8,fp8,0,0.10549333691596985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,4,128,1,float16,float16,0,0.11314133803049724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,4,128,1,float16,fp8,0,0.11373333136240642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,4,128,1,fp8,fp8,0,0.10549867153167725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,8,128,1,float16,fp8,0,0.1151093343893687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,64,128,1,float16,float16,0,0.07666666805744171
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,8,128,1,fp8,fp8,0,0.10849066575368245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,64,8,128,1,float16,float16,0,0.11514666676521301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,1,128,1,float16,float16,0,0.06306666632493337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,1,128,1,float16,fp8,0,0.06311466793219249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,64,128,1,float16,fp8,0,0.07523733377456665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,64,128,1,fp8,fp8,0,0.07130133112271626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,1,128,1,fp8,fp8,0,0.05834133426348368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,2,128,1,float16,float16,0,0.06365333497524261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,2,128,1,fp8,fp8,0,0.05827199916044871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,4,128,1,float16,float16,0,0.0637066662311554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,2,128,1,float16,fp8,0,0.06305600206057231
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,4,128,1,float16,fp8,0,0.06358933448791504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,4,128,1,fp8,fp8,0,0.05892266829808553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,8,128,1,float16,float16,0,0.06491200129191081
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,8,128,1,float16,fp8,0,0.06453866759936015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,64,8,128,1,fp8,fp8,0,0.06072533130645752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,64,128,1,float16,float16,0,0.04422399898370107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,64,128,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,1,128,1,float16,float16,0,0.038346665600935616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,1,128,1,float16,fp8,0,0.038805333276589714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,1,128,1,fp8,fp8,0,0.0359199990828832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,2,128,1,float16,float16,0,0.03857066730658213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,64,128,1,fp8,fp8,0,0.042037333051363625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,2,128,1,float16,fp8,0,0.038848000268141426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,2,128,1,fp8,fp8,0,0.03587199995915095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,4,128,1,float16,float16,0,0.039093332986036934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,8,128,1,float16,float16,0,0.03917866696914037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,4,128,1,fp8,fp8,0,0.037402667105197906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,8,128,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,4,128,1,float16,fp8,0,0.038986665507157646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,64,8,128,1,fp8,fp8,0,0.037903999288876854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,1,128,1,float16,float16,0,2.5218613942464194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,1,128,1,float16,fp8,0,2.5192319552103677
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,1,128,1,fp8,fp8,0,2.418901284535726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,2,128,1,float16,float16,0,2.524282614390055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,2,128,1,float16,fp8,0,2.5013866424560547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,2,128,1,fp8,fp8,0,2.4216160774230957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,4,128,1,float16,fp8,0,2.5813652674357095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,4,128,1,float16,float16,0,2.5684000651041665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,1,128,1,float16,float16,0,1.2418399651845295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,4,128,1,fp8,fp8,0,2.463157335917155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,8,128,1,fp8,fp8,0,2.5269600550333657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,8,128,1,float16,fp8,0,2.608400026957194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,64,8,128,1,float16,float16,0,2.6226933797200522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,64,128,1,float16,float16,0,1.6333279609680176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,64,128,1,float16,fp8,0,1.5698879559834797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,1,128,1,float16,fp8,0,1.23963729540507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,1,128,1,fp8,fp8,0,1.2170506318410237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,64,128,1,fp8,fp8,0,1.508405367533366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,2,128,1,float16,float16,0,1.2455999851226807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,2,128,1,float16,fp8,0,1.2453866799672444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,2,128,1,fp8,fp8,0,1.2101173400878906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,4,128,1,float16,float16,0,1.2476747035980225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,4,128,1,float16,fp8,0,1.2735520203908284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,4,128,1,fp8,fp8,0,1.2297013600667317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,8,128,1,float16,float16,0,1.267807960510254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,8,128,1,float16,fp8,0,1.3057653109232585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,64,8,128,1,fp8,fp8,0,1.2602346738179524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,1,128,1,float16,float16,0,0.6306666533152262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,1,128,1,float16,fp8,0,0.6299253304799398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,1,128,1,fp8,fp8,0,0.59825599193573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,64,128,1,float16,float16,0,0.8132586479187012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,2,128,1,float16,float16,0,0.63482133547465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,2,128,1,float16,fp8,0,0.6350080172220866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,64,128,1,float16,fp8,0,0.783951997756958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,2,128,1,fp8,fp8,0,0.6058986584345499
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,4,128,1,float16,float16,0,0.6382613182067871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,64,128,1,fp8,fp8,0,0.7490080197652181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,4,128,1,float16,fp8,0,0.637392004330953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,4,128,1,fp8,fp8,0,0.6239200035730997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,8,128,1,float16,fp8,0,0.6462346712748209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,8,128,1,float16,float16,0,0.6434026559193929
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,1,128,1,float16,float16,0,0.32635732491811115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,64,8,128,1,fp8,fp8,0,0.6374080181121826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,64,128,1,float16,fp8,0,0.3969759941101074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,64,128,1,float16,float16,0,0.41078933080037433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,64,128,1,fp8,fp8,0,0.3850666681925456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,1,128,1,float16,fp8,0,0.32622400919596356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,1,128,1,fp8,fp8,0,0.31171733140945435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,2,128,1,float16,fp8,0,0.32705066601435345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,2,128,1,fp8,fp8,0,0.3118613362312317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,2,128,1,float16,float16,0,0.32792532444000244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,4,128,1,float16,float16,0,0.32841600974400836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,4,128,1,float16,fp8,0,0.3279573321342468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,4,128,1,fp8,fp8,0,0.3160373369852702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,8,128,1,float16,float16,0,0.33134400844573975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,8,128,1,float16,fp8,0,0.33352001508076984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,64,8,128,1,fp8,fp8,0,0.32208534081776935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,1,128,1,float16,float16,0,0.17362133661905924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,64,128,1,float16,float16,0,0.2162719964981079
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,64,128,1,float16,fp8,0,0.21037866671880087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,1,128,1,fp8,fp8,0,0.16750933726628622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,1,128,1,float16,fp8,0,0.17404800653457642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,2,128,1,float16,float16,0,0.17416532834370932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,64,128,1,fp8,fp8,0,0.20363734165827432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,2,128,1,float16,fp8,0,0.17464532454808554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,4,128,1,float16,float16,0,0.1749173402786255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,2,128,1,fp8,fp8,0,0.16817599534988403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,4,128,1,float16,fp8,0,0.17523733774820963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,4,128,1,fp8,fp8,0,0.16881599028905234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,8,128,1,float16,fp8,0,0.17722133795420328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,8,128,1,float16,float16,0,0.1770240068435669
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,64,8,128,1,fp8,fp8,0,0.17392534017562866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,64,128,1,float16,float16,0,0.11986133456230164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,64,128,1,float16,fp8,0,0.11661332845687866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,1,128,1,float16,float16,0,0.09676266709963481
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,64,128,1,fp8,fp8,0,0.11528533697128296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,1,128,1,float16,fp8,0,0.09635200103123982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,1,128,1,fp8,fp8,0,0.09435733159383138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,2,128,1,float16,float16,0,0.09630933403968811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,2,128,1,float16,fp8,0,0.09717333316802979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,4,128,1,float16,float16,0,0.09734400113423665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,2,128,1,fp8,fp8,0,0.09481066465377808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,4,128,1,fp8,fp8,0,0.09589866797129314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,4,128,1,float16,fp8,0,0.09737599889437358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,8,128,1,float16,float16,0,0.09845333298047383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,8,128,1,float16,fp8,0,0.09887466828028361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,64,8,128,1,fp8,fp8,0,0.0969546635945638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,64,128,1,float16,float16,0,0.07353599866231282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,64,128,1,float16,fp8,0,0.07134933272997539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,64,128,1,fp8,fp8,0,0.0658133327960968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,1,128,1,float16,float16,0,0.056885331869125366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,1,128,1,float16,fp8,0,0.05696000158786774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,1,128,1,fp8,fp8,0,0.052383999029795326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,2,128,1,float16,fp8,0,0.05705066521962484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,2,128,1,float16,float16,0,0.05705066521962484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,2,128,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,4,128,1,float16,float16,0,0.05740800003210703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,4,128,1,float16,fp8,0,0.05712533493836721
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,4,128,1,fp8,fp8,0,0.053354665637016296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,8,128,1,float16,float16,0,0.05760000149408976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,8,128,1,float16,fp8,0,0.05775466561317444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,64,8,128,1,fp8,fp8,0,0.05598400036493937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,64,128,1,float16,float16,0,0.03938666731119156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,64,128,1,float16,fp8,0,0.03967999915281931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,1,128,1,float16,float16,0,0.035418666899204254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,64,128,1,fp8,fp8,0,0.03926933308442434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,1,128,1,float16,fp8,0,0.035599999129772186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,1,128,1,fp8,fp8,0,0.03305066625277201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,2,128,1,float16,float16,0,0.035599999129772186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,2,128,1,float16,fp8,0,0.03527999917666117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,2,128,1,fp8,fp8,0,0.03331733246644338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,4,128,1,float16,float16,0,0.03595733394225439
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,4,128,1,fp8,fp8,0,0.034917332231998444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,4,128,1,float16,fp8,0,0.03622400015592575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,8,128,1,float16,float16,0,0.03640533238649368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,8,128,1,float16,fp8,0,0.036559998989105225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,64,8,128,1,fp8,fp8,0,0.035061334570248924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,64,128,1,float16,float16,0,0.026522666215896606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,64,128,1,float16,fp8,0,0.02701333413521449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,1,128,1,float16,float16,0,0.024677333732446034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,1,128,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,1,128,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,2,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,2,128,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,2,128,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,4,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,4,128,1,float16,fp8,0,0.025775998830795288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,4,128,1,fp8,fp8,0,0.02383466561635335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,8,128,1,float16,float16,0,0.025397333006064098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,8,128,1,float16,fp8,0,0.025834667185942333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,64,8,128,1,fp8,fp8,0,0.02402666707833608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,1,128,1,float16,fp8,0,1.127285321553548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,1,128,1,float16,float16,0,1.1264533201853435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,1,128,1,fp8,fp8,0,1.2105546792348225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,2,128,1,float16,float16,0,1.134874661763509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,2,128,1,float16,fp8,0,1.1354719797770183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,2,128,1,fp8,fp8,0,1.2051040331522624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,4,128,1,float16,float16,0,1.1425493558247883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,4,128,1,float16,fp8,0,1.156821330388387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,1,128,1,float16,float16,0,0.5746613343556722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,4,128,1,fp8,fp8,0,1.2373440265655518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,8,128,1,float16,float16,0,1.1654613018035889
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,8,128,1,float16,fp8,0,1.2009173234303792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,64,8,128,1,fp8,fp8,0,1.2539733250935872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,1,128,1,float16,fp8,0,0.5746080080668131
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,64,128,1,float16,fp8,0,0.7362240155537924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,64,128,1,float16,float16,0,0.7561279932657877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,64,128,1,fp8,fp8,0,0.7547307014465332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,1,128,1,fp8,fp8,0,0.5983200073242188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,2,128,1,float16,float16,0,0.5760800043741862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,2,128,1,float16,fp8,0,0.5775466759999593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,2,128,1,fp8,fp8,0,0.6018720070521036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,4,128,1,float16,float16,0,0.5791946649551392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,4,128,1,float16,fp8,0,0.5809706846872965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,4,128,1,fp8,fp8,0,0.6206719875335693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,8,128,1,float16,float16,0,0.5873279968897501
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,8,128,1,float16,fp8,0,0.5876213312149048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,1,128,1,float16,float16,0,0.29888532559076947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,64,8,128,1,fp8,fp8,0,0.6359946727752686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,1,128,1,float16,fp8,0,0.2980853319168091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,64,128,1,float16,float16,0,0.40193601449330646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,1,128,1,fp8,fp8,0,0.30952000617980957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,64,128,1,float16,fp8,0,0.37885868549346924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,2,128,1,float16,float16,0,0.2993493278821309
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,64,128,1,fp8,fp8,0,0.3843199809392293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,2,128,1,float16,fp8,0,0.3002026677131653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,2,128,1,fp8,fp8,0,0.31134400765101117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,4,128,1,float16,float16,0,0.301146666208903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,4,128,1,fp8,fp8,0,0.31519466638565063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,4,128,1,float16,fp8,0,0.30108267068862915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,8,128,1,float16,float16,0,0.3052533268928528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,8,128,1,float16,fp8,0,0.30532266696294147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,64,8,128,1,fp8,fp8,0,0.3219520052274068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,1,128,1,float16,float16,0,0.1602826714515686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,1,128,1,float16,fp8,0,0.1607146660486857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,1,128,1,fp8,fp8,0,0.16581867138544717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,64,128,1,float16,float16,0,0.21964800357818604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,64,128,1,float16,fp8,0,0.2062186598777771
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,2,128,1,float16,float16,0,0.1600213348865509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,64,128,1,fp8,fp8,0,0.20167466004689535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,2,128,1,float16,fp8,0,0.16125866770744324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,2,128,1,fp8,fp8,0,0.16731733083724976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,4,128,1,float16,float16,0,0.1627946694691976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,4,128,1,float16,fp8,0,0.16210132837295532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,4,128,1,fp8,fp8,0,0.1682986617088318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,8,128,1,float16,fp8,0,0.1634933352470398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,8,128,1,fp8,fp8,0,0.17160000403722128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,64,8,128,1,float16,float16,0,0.16453333695729574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,1,128,1,float16,float16,0,0.0904960036277771
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,1,128,1,float16,fp8,0,0.0916426678498586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,1,128,1,fp8,fp8,0,0.09425066908200581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,64,128,1,float16,float16,0,0.1204746663570404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,64,128,1,float16,fp8,0,0.11682666341463725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,64,128,1,fp8,fp8,0,0.11338667074839275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,2,128,1,float16,float16,0,0.09032533566157024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,2,128,1,float16,fp8,0,0.09131200114885966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,2,128,1,fp8,fp8,0,0.0937013328075409
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,4,128,1,float16,float16,0,0.09142933289210002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,4,128,1,float16,fp8,0,0.09128533800443013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,4,128,1,fp8,fp8,0,0.0946720043818156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,8,128,1,float16,float16,0,0.09239466985066731
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,8,128,1,fp8,fp8,0,0.09690666198730469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,64,8,128,1,float16,fp8,0,0.0925386647383372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,1,128,1,float16,float16,0,0.05301333467165629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,64,128,1,float16,float16,0,0.07080533107121785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,64,128,1,float16,fp8,0,0.06841599941253662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,64,128,1,fp8,fp8,0,0.06638399759928386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,1,128,1,float16,fp8,0,0.05225066840648651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,1,128,1,fp8,fp8,0,0.05279466509819031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,2,128,1,float16,float16,0,0.05256533126036326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,2,128,1,float16,fp8,0,0.053861334919929504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,2,128,1,fp8,fp8,0,0.0529013325770696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,4,128,1,float16,float16,0,0.05452266832192739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,4,128,1,float16,fp8,0,0.054485330979029335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,4,128,1,fp8,fp8,0,0.053226664662361145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,8,128,1,float16,float16,0,0.05387733379999796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,8,128,1,float16,fp8,0,0.05483733117580414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,64,8,128,1,fp8,fp8,0,0.05495999753475189
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,64,128,1,float16,float16,0,0.0383093332250913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,64,128,1,float16,fp8,0,0.03742400060097376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,1,128,1,float16,float16,0,0.033930666744709015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,64,128,1,fp8,fp8,0,0.038922667503356934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,1,128,1,float16,fp8,0,0.034074666599432625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,1,128,1,fp8,fp8,0,0.03268266717592875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,2,128,1,float16,float16,0,0.03392533212900162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,2,128,1,float16,fp8,0,0.03332266708215078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,2,128,1,fp8,fp8,0,0.03299200038115183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,4,128,1,float16,float16,0,0.034143999218940735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,4,128,1,float16,fp8,0,0.03480533262093862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,4,128,1,fp8,fp8,0,0.034976000587145485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,8,128,1,float16,float16,0,0.0342399999499321
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,8,128,1,float16,fp8,0,0.035045333206653595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,64,8,128,1,fp8,fp8,0,0.03489066660404205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,64,128,1,float16,float16,0,0.025754667818546295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,64,128,1,float16,fp8,0,0.025920001169045765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,1,128,1,float16,float16,0,0.02386133372783661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,1,128,1,float16,fp8,0,0.024442667762438457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,1,128,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,2,128,1,float16,float16,0,0.023984000086784363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,2,128,1,float16,fp8,0,0.02422400067249934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,2,128,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,4,128,1,float16,float16,0,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,4,128,1,float16,fp8,0,0.024346667031447094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,4,128,1,fp8,fp8,0,0.023957334458827972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,8,128,1,float16,float16,0,0.02475733309984207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,8,128,1,float16,fp8,0,0.02489600082238515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,64,128,1,float16,fp8,0,0.020874666670958202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,64,8,128,1,fp8,fp8,0,0.02421333392461141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,64,128,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,1,128,1,float16,float16,0,0.01998399943113327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,1,128,1,float16,fp8,0,0.02022933339079221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,1,128,1,fp8,fp8,0,0.02038399999340375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,2,128,1,float16,float16,0,0.020554666717847187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,2,128,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,2,128,1,fp8,fp8,0,0.02021866664290428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,4,128,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,4,128,1,fp8,fp8,0,0.020773333807786305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,8,128,1,float16,float16,0,0.020693333198626835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,4,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,8,128,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,64,8,128,1,float16,fp8,0,0.02089066555102666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,1,128,1,float16,float16,0,0.7391040325164795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,1,128,1,float16,fp8,0,0.7366666793823242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,1,128,1,fp8,fp8,0,0.8013439973195394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,2,128,1,float16,fp8,0,0.7403413454691569
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,2,128,1,float16,float16,0,0.7405227025349935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,4,128,1,float16,float16,0,0.7433013121287028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,4,128,1,float16,fp8,0,0.745306650797526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,2,128,1,fp8,fp8,0,0.8064053058624268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,4,128,1,fp8,fp8,0,0.8340319792429606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,1,128,1,float16,float16,0,0.37755731741587323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,8,128,1,float16,float16,0,0.7518453598022461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,8,128,1,float16,fp8,0,0.7529173692067465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,64,8,128,1,fp8,fp8,0,0.8625280062357584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,64,128,1,float16,fp8,0,0.42392532030741376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,64,128,1,float16,float16,0,0.4381386836369832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,64,128,1,fp8,fp8,0,0.4889013369878133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,1,128,1,float16,fp8,0,0.37773334980010986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,1,128,1,fp8,fp8,0,0.41180264949798584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,2,128,1,float16,float16,0,0.37863465150197345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,2,128,1,float16,fp8,0,0.3794773419698079
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,2,128,1,fp8,fp8,0,0.415829340616862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,4,128,1,float16,float16,0,0.38062934080759686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,4,128,1,float16,fp8,0,0.3813920021057129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,4,128,1,fp8,fp8,0,0.42395198345184326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,8,128,1,float16,float16,0,0.38482666015625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,1,128,1,float16,float16,0,0.19922133286794028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,8,128,1,float16,fp8,0,0.38525867462158203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,1,128,1,float16,fp8,0,0.19954667488733926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,64,8,128,1,fp8,fp8,0,0.42901865641276044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,64,128,1,float16,float16,0,0.23118933041890463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,1,128,1,fp8,fp8,0,0.21891733010609946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,64,128,1,float16,fp8,0,0.22351467609405518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,64,128,1,fp8,fp8,0,0.2539199988047282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,2,128,1,float16,float16,0,0.19902400175730386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,2,128,1,fp8,fp8,0,0.2188160022099813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,4,128,1,float16,float16,0,0.199455996354421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,4,128,1,float16,fp8,0,0.20061333974202475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,4,128,1,fp8,fp8,0,0.220634659131368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,2,128,1,float16,fp8,0,0.19930134216944376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,8,128,1,float16,float16,0,0.20171199242273966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,8,128,1,float16,fp8,0,0.20149334271748862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,64,8,128,1,fp8,fp8,0,0.22577067216237387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,1,128,1,float16,float16,0,0.10815466443697612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,1,128,1,fp8,fp8,0,0.12074666221936543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,64,128,1,float16,float16,0,0.1264533301194509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,2,128,1,float16,float16,0,0.10829333464304607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,1,128,1,float16,fp8,0,0.10899200042088826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,64,128,1,float16,fp8,0,0.12269333004951477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,64,128,1,fp8,fp8,0,0.13847466309865317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,2,128,1,float16,fp8,0,0.10821866989135742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,2,128,1,fp8,fp8,0,0.12077866991360982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,4,128,1,float16,float16,0,0.10878933469454448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,4,128,1,float16,fp8,0,0.10909866293271382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,4,128,1,fp8,fp8,0,0.12139200170834859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,8,128,1,float16,fp8,0,0.11056533455848694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,8,128,1,fp8,fp8,0,0.12326932946840923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,64,8,128,1,float16,float16,0,0.10965866843859355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,64,128,1,float16,float16,0,0.07164266705513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,1,128,1,float16,float16,0,0.06118399898211161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,1,128,1,fp8,fp8,0,0.06612800061702728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,2,128,1,float16,float16,0,0.0613013356924057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,64,128,1,float16,fp8,0,0.070933332045873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,1,128,1,float16,fp8,0,0.06256533165772755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,64,128,1,fp8,fp8,0,0.07880533238252004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,2,128,1,float16,fp8,0,0.06196266909440359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,2,128,1,fp8,fp8,0,0.06598933537801106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,4,128,1,float16,float16,0,0.06185600161552429
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,4,128,1,float16,fp8,0,0.0617386649052302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,4,128,1,fp8,fp8,0,0.06788266698519389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,8,128,1,float16,float16,0,0.0631573349237442
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,8,128,1,fp8,fp8,0,0.06950399776299794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,64,8,128,1,float16,fp8,0,0.06283199787139893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,64,128,1,float16,float16,0,0.039706667264302574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,64,128,1,float16,fp8,0,0.039173332353432976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,64,128,1,fp8,fp8,0,0.04626133541266123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,1,128,1,float16,float16,0,0.036874666810035706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,1,128,1,fp8,fp8,0,0.040175999204317726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,2,128,1,float16,float16,0,0.0374293327331543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,2,128,1,float16,fp8,0,0.03758399933576584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,1,128,1,float16,fp8,0,0.037802666425704956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,4,128,1,float16,float16,0,0.03755733370780945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,2,128,1,fp8,fp8,0,0.04057066639264425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,4,128,1,float16,fp8,0,0.03839466720819473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,4,128,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,8,128,1,float16,float16,0,0.03835200021664301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,8,128,1,float16,fp8,0,0.03857066730658213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,64,8,128,1,fp8,fp8,0,0.04253333310286204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,64,128,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,1,128,1,float16,float16,0,0.02569066733121872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,1,128,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,64,128,1,float16,float16,0,0.027029333015282948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,1,128,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,2,128,1,float16,float16,0,0.025733334322770435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,64,128,1,fp8,fp8,0,0.02903466671705246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,2,128,1,float16,fp8,0,0.026176000634829204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,2,128,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,4,128,1,float16,fp8,0,0.026341333985328674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,8,128,1,float16,float16,0,0.026613332331180573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,8,128,1,float16,fp8,0,0.026357332865397137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,4,128,1,float16,float16,0,0.02622933437426885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,64,128,1,float16,float16,0,0.02096533278624217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,64,128,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,8,128,1,fp8,fp8,0,0.027834666272004444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,64,4,128,1,fp8,fp8,0,0.02826666583617528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,64,128,1,fp8,fp8,0,0.02161066730817159
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,1,128,1,float16,float16,0,0.019941333681344986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,1,128,1,float16,fp8,0,0.01998399943113327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,2,128,1,float16,fp8,0,0.01971199984351794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,2,128,1,float16,float16,0,0.019632000476121902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,1,128,1,fp8,fp8,0,0.02037866661945979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,2,128,1,fp8,fp8,0,0.02032533288002014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,4,128,1,float16,fp8,0,0.02037866661945979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,4,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,8,128,1,float16,float16,0,0.02032533288002014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,8,128,1,float16,fp8,0,0.019914666811625164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,4,128,1,float16,float16,0,0.019893333315849304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,64,8,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,64,128,1,float16,float16,0,0.018543999642133713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,1,128,1,float16,float16,0,0.01823466643691063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,64,128,1,fp8,fp8,0,0.019834666202465694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,1,128,1,float16,fp8,0,0.018239999810854595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,1,128,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,64,128,1,float16,fp8,0,0.018485333770513535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,2,128,1,float16,fp8,0,0.01833600054184596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,2,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,2,128,1,float16,float16,0,0.017781333376963932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,4,128,1,float16,float16,0,0.018618666877349217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,4,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,4,128,1,float16,fp8,0,0.018405333161354065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,8,128,1,float16,float16,0,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,8,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,64,8,128,1,float16,fp8,0,0.018645333747069042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,1,128,1,float16,float16,0,0.5036213397979736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,1,128,1,fp8,fp8,0,0.6456693410873413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,2,128,1,float16,fp8,0,0.509989341100057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,2,128,1,float16,float16,0,0.5089439948399862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,2,128,1,fp8,fp8,0,0.6494773228963217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,1,128,1,float16,fp8,0,0.5041973193486532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,4,128,1,float16,float16,0,0.5195146799087524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,4,128,1,float16,fp8,0,0.5188479820887247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,4,128,1,fp8,fp8,0,0.6569919983545939
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,8,128,1,float16,float16,0,0.5302026669184366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,8,128,1,float16,fp8,0,0.5302453438440958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,64,8,128,1,fp8,fp8,0,0.6657600005467733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,1,128,1,float16,float16,0,0.2615999976793925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,64,128,1,float16,float16,0,0.29231999317804974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,64,128,1,fp8,fp8,0,0.36894933382670086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,1,128,1,float16,fp8,0,0.2621013323465983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,64,128,1,float16,fp8,0,0.2871413429578145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,1,128,1,fp8,fp8,0,0.3360319932301839
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,2,128,1,float16,float16,0,0.26310932636260986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,2,128,1,float16,fp8,0,0.26293333371480304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,4,128,1,float16,fp8,0,0.2679413358370463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,4,128,1,float16,float16,0,0.2669493357340495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,2,128,1,fp8,fp8,0,0.3375466664632161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,4,128,1,fp8,fp8,0,0.3410559892654419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,8,128,1,float16,fp8,0,0.2735466758410136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,8,128,1,float16,float16,0,0.27446399132410687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,64,8,128,1,fp8,fp8,0,0.346288005510966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,1,128,1,float16,fp8,0,0.13942399621009827
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,64,128,1,float16,float16,0,0.1564853290716807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,64,128,1,float16,fp8,0,0.15260266264279684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,64,128,1,fp8,fp8,0,0.19590934117635092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,1,128,1,float16,float16,0,0.13893333077430725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,1,128,1,fp8,fp8,0,0.1807039976119995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,2,128,1,float16,fp8,0,0.14045332868893942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,2,128,1,float16,float16,0,0.1398240029811859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,2,128,1,fp8,fp8,0,0.18075199921925864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,4,128,1,float16,float16,0,0.14082133769989014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,4,128,1,float16,fp8,0,0.14239466190338135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,4,128,1,fp8,fp8,0,0.18146665891011557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,8,128,1,float16,fp8,0,0.14485333363215128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,8,128,1,fp8,fp8,0,0.18199467658996582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,64,8,128,1,float16,float16,0,0.14416000247001648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,1,128,1,float16,fp8,0,0.07604800164699554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,64,128,1,float16,float16,0,0.08922666311264038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,1,128,1,float16,float16,0,0.0765173335870107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,64,128,1,float16,fp8,0,0.08786666393280029
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,64,128,1,fp8,fp8,0,0.10994133353233337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,1,128,1,fp8,fp8,0,0.09797333677609761
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,2,128,1,float16,float16,0,0.07606933514277141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,2,128,1,float16,fp8,0,0.07699200014273326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,2,128,1,fp8,fp8,0,0.09852799773216248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,4,128,1,float16,fp8,0,0.07683733105659485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,4,128,1,fp8,fp8,0,0.09850666920344035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,8,128,1,float16,float16,0,0.0779306689898173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,8,128,1,float16,fp8,0,0.07819200058778127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,4,128,1,float16,float16,0,0.07612800101439159
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,64,8,128,1,fp8,fp8,0,0.10054399569829305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,1,128,1,float16,float16,0,0.04670399924119314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,1,128,1,float16,fp8,0,0.046336000164349876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,64,128,1,float16,float16,0,0.04688533147176107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,1,128,1,fp8,fp8,0,0.05648000041643778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,64,128,1,fp8,fp8,0,0.06115200122197469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,2,128,1,float16,fp8,0,0.04580266773700714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,64,128,1,float16,fp8,0,0.04620266457398733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,2,128,1,float16,float16,0,0.04640000065167745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,2,128,1,fp8,fp8,0,0.05649599929650625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,4,128,1,float16,float16,0,0.046575998266537987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,4,128,1,fp8,fp8,0,0.058229332168896995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,8,128,1,float16,float16,0,0.04743466774622599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,8,128,1,float16,fp8,0,0.046906664967536926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,4,128,1,float16,fp8,0,0.04730666677157084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,64,8,128,1,fp8,fp8,0,0.05797333518664042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,64,128,1,float16,fp8,0,0.030794667700926464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,64,128,1,float16,float16,0,0.030016000072161358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,64,128,1,fp8,fp8,0,0.036805334190527596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,1,128,1,float16,float16,0,0.029989334444204967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,1,128,1,float16,fp8,0,0.030229332546393078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,1,128,1,fp8,fp8,0,0.03465066601832708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,2,128,1,float16,fp8,0,0.029792000850041706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,2,128,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,2,128,1,float16,float16,0,0.029882666965325672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,4,128,1,float16,fp8,0,0.030832000076770782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,8,128,1,float16,float16,0,0.03035733352104823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,4,128,1,fp8,fp8,0,0.036042665441830955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,4,128,1,float16,float16,0,0.03018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,8,128,1,fp8,fp8,0,0.035573333501815796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,64,128,1,float16,float16,0,0.02257599929968516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,64,8,128,1,float16,fp8,0,0.030165334542592365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,64,128,1,fp8,fp8,0,0.02587733417749405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,1,128,1,float16,float16,0,0.021957332889238994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,1,128,1,float16,fp8,0,0.022202665607134502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,64,128,1,float16,fp8,0,0.022175999979178112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,2,128,1,float16,float16,0,0.021802666286627453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,2,128,1,float16,fp8,0,0.02181333303451538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,4,128,1,float16,float16,0,0.021562665700912476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,2,128,1,fp8,fp8,0,0.02442666639884313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,4,128,1,float16,fp8,0,0.022346665461858112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,4,128,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,1,128,1,fp8,fp8,0,0.02476266771554947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,8,128,1,float16,fp8,0,0.02214933435122172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,8,128,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,64,8,128,1,float16,float16,0,0.021882665654023487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,64,128,1,float16,float16,0,0.017877332866191864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,64,128,1,fp8,fp8,0,0.02015999952952067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,1,128,1,float16,float16,0,0.017898666361967724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,1,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,64,128,1,float16,fp8,0,0.018426666657129925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,2,128,1,float16,float16,0,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,2,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,1,128,1,float16,fp8,0,0.01812800019979477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,2,128,1,float16,fp8,0,0.018170667191346485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,4,128,1,float16,fp8,0,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,4,128,1,float16,float16,0,0.01782400036851565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,4,128,1,fp8,fp8,0,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,8,128,1,float16,fp8,0,0.018266666680574417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,8,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,64,8,128,1,fp8,fp8,0,0.019941333681344986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,1,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,64,128,1,float16,float16,0,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,1,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,1,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,64,128,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,2,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,2,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,2,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,4,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,4,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,8,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,8,128,1,fp8,fp8,0,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,4,128,1,float16,fp8,0,0.01773333301146825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,64,8,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,1,128,1,float16,fp8,0,0.43372801939646405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,1,128,1,fp8,fp8,0,0.5713813304901123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,1,128,1,float16,float16,0,0.433845321337382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,2,128,1,float16,float16,0,0.4361120065053304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,2,128,1,fp8,fp8,0,0.5751039981842041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,2,128,1,float16,fp8,0,0.4347413380940755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,4,128,1,float16,float16,0,0.43885334332784015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,4,128,1,float16,fp8,0,0.4397226572036743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,4,128,1,fp8,fp8,0,0.5798346598943075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,8,128,1,float16,float16,0,0.44515732924143475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,8,128,1,fp8,fp8,0,0.5830133358637491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,64,128,1,float16,float16,0,0.23073599735895792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,1,128,1,float16,float16,0,0.22470933198928833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,64,8,128,1,float16,fp8,0,0.4459306796391805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,1,128,1,float16,fp8,0,0.22533865769704184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,64,128,1,float16,fp8,0,0.22690133253733316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,1,128,1,fp8,fp8,0,0.2992639938990275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,64,128,1,fp8,fp8,0,0.3129333257675171
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,2,128,1,float16,float16,0,0.2259733279546102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,2,128,1,float16,fp8,0,0.22610666354497275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,4,128,1,float16,fp8,0,0.22852800289789835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,2,128,1,fp8,fp8,0,0.30008000135421753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,4,128,1,fp8,fp8,0,0.3005173405011495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,4,128,1,float16,float16,0,0.22739199797312418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,8,128,1,float16,float16,0,0.23016534248987833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,8,128,1,fp8,fp8,0,0.3019839922587077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,64,8,128,1,float16,fp8,0,0.23018133640289307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,64,128,1,float16,float16,0,0.1243839959303538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,1,128,1,float16,float16,0,0.12020267049471538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,64,128,1,float16,fp8,0,0.12217600146929423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,1,128,1,fp8,fp8,0,0.1586240033308665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,1,128,1,float16,fp8,0,0.11958400408426921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,2,128,1,float16,float16,0,0.12011733651161194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,64,128,1,fp8,fp8,0,0.16844799121220908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,2,128,1,fp8,fp8,0,0.1581546664237976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,2,128,1,float16,fp8,0,0.11935999989509583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,4,128,1,float16,float16,0,0.12000000476837158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,4,128,1,float16,fp8,0,0.1209333340326945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,4,128,1,fp8,fp8,0,0.15918933351834616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,8,128,1,float16,fp8,0,0.121370663245519
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,8,128,1,float16,float16,0,0.12031466762224834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,64,128,1,float16,float16,0,0.06567466755708058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,64,8,128,1,fp8,fp8,0,0.1614346702893575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,1,128,1,float16,float16,0,0.06691200037797292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,64,128,1,float16,fp8,0,0.06513600051403046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,1,128,1,float16,fp8,0,0.0672266681989034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,64,128,1,fp8,fp8,0,0.09346666932106018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,1,128,1,fp8,fp8,0,0.08806932965914409
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,2,128,1,float16,fp8,0,0.06715733309586842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,2,128,1,float16,float16,0,0.06683200101057689
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,4,128,1,float16,float16,0,0.06772266825040181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,4,128,1,fp8,fp8,0,0.08975999553998311
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,2,128,1,fp8,fp8,0,0.08833600083986919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,8,128,1,float16,float16,0,0.06795733173688252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,8,128,1,float16,fp8,0,0.06804266571998596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,4,128,1,float16,fp8,0,0.06811733543872833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,64,8,128,1,fp8,fp8,0,0.09008533755938213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,64,128,1,float16,fp8,0,0.04027733455101649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,64,128,1,float16,float16,0,0.03938133269548416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,64,128,1,fp8,fp8,0,0.052330667773882546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,1,128,1,float16,fp8,0,0.041696002086003624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,1,128,1,fp8,fp8,0,0.05173333485921224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,1,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,2,128,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,2,128,1,float16,float16,0,0.041834667325019836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,2,128,1,fp8,fp8,0,0.051738664507865906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,4,128,1,float16,fp8,0,0.04193066557248434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,4,128,1,float16,float16,0,0.04156800111134847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,8,128,1,float16,float16,0,0.04283200204372406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,4,128,1,fp8,fp8,0,0.05242133140563965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,8,128,1,fp8,fp8,0,0.0525439977645874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,64,128,1,float16,fp8,0,0.027914665639400482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,64,128,1,fp8,fp8,0,0.03385599950949351
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,64,8,128,1,float16,fp8,0,0.042447999119758606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,1,128,1,float16,float16,0,0.027317332724730175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,1,128,1,float16,fp8,0,0.027701333165168762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,1,128,1,fp8,fp8,0,0.03259200106064478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,2,128,1,float16,float16,0,0.027786667148272198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,64,128,1,float16,float16,0,0.027493332823117573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,2,128,1,float16,fp8,0,0.028058665494124096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,2,128,1,fp8,fp8,0,0.032501332461833954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,4,128,1,fp8,fp8,0,0.03336533407370249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,4,128,1,float16,float16,0,0.027509334186712902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,4,128,1,float16,fp8,0,0.028090665737787884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,8,128,1,float16,float16,0,0.027893332143624622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,8,128,1,float16,fp8,0,0.02815466622511546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,64,128,1,float16,float16,0,0.02041600023706754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,64,128,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,64,8,128,1,fp8,fp8,0,0.033759998778502144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,64,128,1,fp8,fp8,0,0.024442667762438457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,1,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,1,128,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,1,128,1,float16,float16,0,0.020549333343903225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,2,128,1,float16,float16,0,0.020629333953062694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,2,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,2,128,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,4,128,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,4,128,1,fp8,fp8,0,0.0235359991590182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,8,128,1,float16,float16,0,0.020981334149837494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,4,128,1,float16,float16,0,0.021210665504137676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,8,128,1,fp8,fp8,0,0.023706667125225067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,64,8,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,64,128,1,float16,fp8,0,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,64,128,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,1,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,1,128,1,float16,fp8,0,0.01766933376590411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,2,128,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,2,128,1,float16,fp8,0,0.017845333864291508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,4,128,1,float16,float16,0,0.017263999829689663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,2,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,1,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,4,128,1,float16,fp8,0,0.017701332767804463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,4,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,8,128,1,float16,float16,0,0.017514667163292568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,8,128,1,float16,fp8,0,0.018016000588734944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,64,8,128,1,fp8,fp8,0,0.019573333362738293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,64,128,1,float16,float16,0,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,1,128,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,1,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,1,128,1,fp8,fp8,0,0.018405333161354065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,2,128,1,float16,float16,0,0.016602666427691776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,2,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,64,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,2,128,1,fp8,fp8,0,0.01801066721479098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,4,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,4,128,1,float16,float16,0,0.0164533331990242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,4,128,1,fp8,fp8,0,0.018298666924238205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,8,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,8,128,1,fp8,fp8,0,0.018751999984184902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,64,8,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,128,1,float16,fp8,0,49.80255126953125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,128,1,fp8,fp8,0,33.10327911376953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,128,1,float16,fp8,0,50.095113118489586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,128,1,fp8,fp8,0,33.419087727864586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,128,1,float16,fp8,0,49.53326924641927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,128,1,float16,float16,0,50.339701334635414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,128,1,float16,float16,0,51.046051025390625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,128,1,float16,float16,0,49.92816162109375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,128,1,float16,float16,0,25.18189239501953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,128,1,fp8,fp8,0,33.12013244628906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,128,1,float16,float16,0,50.21357218424479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,128,1,fp8,fp8,0,33.61823527018229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,128,1,float16,fp8,0,49.897216796875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,128,1,float16,fp8,0,25.091654459635418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,128,1,fp8,fp8,0,17.224411010742188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,128,1,float16,float16,0,24.969584147135418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,128,1,float16,fp8,0,25.108970642089844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,128,1,fp8,fp8,0,16.682671864827473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,128,1,fp8,fp8,0,16.732826232910156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,128,1,float16,fp8,0,25.346598307291668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,128,1,float16,float16,0,25.001744588216145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,128,1,float16,float16,0,25.29113515218099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,128,1,float16,fp8,0,25.36964162190755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,128,1,fp8,fp8,0,16.561818440755207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,128,1,float16,float16,0,12.973055521647135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,128,1,float16,fp8,0,12.572682698567709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,128,1,float16,float16,0,25.164944966634113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,128,1,float16,fp8,0,25.04041035970052
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,128,1,fp8,fp8,0,16.784783681233723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,128,1,float16,float16,0,12.868447621663412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,128,1,float16,fp8,0,12.57684326171875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,128,1,fp8,fp8,0,8.61245854695638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,128,1,fp8,fp8,0,8.590415954589844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,128,1,float16,float16,0,12.624202728271484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,128,1,fp8,fp8,0,8.405888239542643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,128,1,float16,float16,0,12.721482594807943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,128,1,float16,fp8,0,12.583717346191406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,128,1,float16,fp8,0,12.800336201985678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,128,1,fp8,fp8,0,8.389151891072592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,128,1,float16,float16,0,12.77779769897461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,128,1,float16,fp8,0,12.993509928385416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,128,1,fp8,fp8,0,8.514640172322592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,128,1,float16,float16,0,6.523647944132487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,128,1,float16,float16,0,6.343146642049153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,128,1,fp8,fp8,0,4.2421919504801435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,128,1,float16,fp8,0,6.3623199462890625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,128,1,float16,fp8,0,6.343680063883464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,128,1,float16,float16,0,6.477658589680989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,128,1,float16,fp8,0,6.390133539835612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,128,1,fp8,fp8,0,4.299536069234212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,128,1,fp8,fp8,0,4.316485404968262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,128,1,float16,float16,0,6.444741566975911
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,128,1,fp8,fp8,0,4.284666697184245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,128,1,float16,fp8,0,6.1761118570963545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,128,1,float16,float16,0,6.52133305867513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,128,1,fp8,fp8,0,4.254586537679036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,128,1,float16,fp8,0,6.340949376424153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,128,1,float16,float16,0,29.420516967773438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,128,1,fp8,fp8,0,19.468544006347656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,128,1,float16,fp8,0,28.795387268066406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,128,1,float16,float16,0,28.664703369140625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,128,1,float16,fp8,0,28.773348490397137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,128,1,fp8,fp8,0,19.637674967447918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,128,1,float16,float16,0,28.886502583821613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,128,1,float16,fp8,0,29.318862915039062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,128,1,float16,float16,0,14.868597666422525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,128,1,fp8,fp8,0,19.6879145304362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,128,1,float16,float16,0,29.0979741414388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,128,1,fp8,fp8,0,19.25623957316081
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,128,1,float16,fp8,0,28.950485229492188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,128,1,float16,fp8,0,14.738250732421875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,128,1,float16,float16,0,14.57580312093099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,128,1,float16,fp8,0,14.58133316040039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,128,1,fp8,fp8,0,9.966922760009766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,128,1,fp8,fp8,0,10.015354792277018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,128,1,float16,float16,0,14.581530253092447
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,128,1,float16,fp8,0,14.399424235026041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,128,1,fp8,fp8,0,9.707306543986002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,128,1,fp8,fp8,0,9.713631947835287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,128,1,float16,fp8,0,14.618464152018229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,128,1,float16,float16,0,14.827946980794271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,128,1,float16,float16,0,14.797167460123697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,128,1,float16,fp8,0,14.407498677571615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,128,1,float16,float16,0,7.284981409708659
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,128,1,float16,fp8,0,7.481039683024089
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,128,1,fp8,fp8,0,9.785482406616211
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,128,1,fp8,fp8,0,4.924720128377278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,128,1,float16,float16,0,7.29917844136556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,128,1,float16,float16,0,7.110293070475261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,128,1,float16,fp8,0,7.294991811116536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,128,1,fp8,fp8,0,4.9839785893758135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,128,1,float16,fp8,0,7.411845525105794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,128,1,fp8,fp8,0,4.970810572306315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,128,1,float16,fp8,0,7.3572743733723955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,128,1,float16,float16,0,7.4119307200113935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,128,1,fp8,fp8,0,4.827712059020996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,128,1,float16,float16,0,7.3591734568278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,128,1,float16,float16,0,3.7004639307657876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,128,1,float16,fp8,0,7.252981185913086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,128,1,fp8,fp8,0,5.074202537536621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,128,1,float16,fp8,0,3.666229248046875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,128,1,float16,float16,0,3.6901652018229165
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,128,1,float16,fp8,0,3.6561813354492188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,128,1,fp8,fp8,0,2.571743965148926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,128,1,fp8,fp8,0,2.618586699167887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,128,1,float16,fp8,0,3.61354128519694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,128,1,float16,float16,0,3.6457974116007485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,128,1,fp8,fp8,0,2.5823094050089517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,128,1,float16,float16,0,3.6343040466308594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,128,1,fp8,fp8,0,2.5843040148417153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,128,1,float16,fp8,0,3.6304054260253906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,128,1,float16,float16,0,3.6466614405314126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,128,1,float16,fp8,0,3.739717483520508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,128,1,fp8,fp8,0,2.588688055674235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,128,1,float16,fp8,0,20.84380849202474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,128,1,float16,float16,0,20.251370747884113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,128,1,fp8,fp8,0,13.869375864664713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,128,1,float16,fp8,0,20.553311665852863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,128,1,float16,float16,0,20.585243225097656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,128,1,fp8,fp8,0,13.926778157552084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,128,1,float16,float16,0,20.716373443603516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,128,1,float16,fp8,0,20.456314086914062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,128,1,float16,float16,0,10.430714925130209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,128,1,fp8,fp8,0,14.244346618652344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,128,1,float16,float16,0,20.61204783121745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,128,1,fp8,fp8,0,14.030586242675781
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,128,1,float16,fp8,0,10.399328231811523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,128,1,float16,float16,0,10.37283198038737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,128,1,float16,fp8,0,10.397893269856771
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,128,1,float16,fp8,0,20.26854960123698
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,128,1,fp8,fp8,0,7.216021219889323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,128,1,fp8,fp8,0,6.854442596435547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,128,1,float16,fp8,0,10.254032135009766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,128,1,fp8,fp8,0,6.924453099568685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,128,1,float16,float16,0,10.207418441772461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,128,1,float16,float16,0,10.262186686197916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,128,1,float16,fp8,0,10.210549036661783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,128,1,fp8,fp8,0,7.028474807739258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,128,1,float16,float16,0,10.516634623209635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,128,1,float16,float16,0,5.146213213602702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,128,1,float16,fp8,0,5.102383931477864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,128,1,fp8,fp8,0,7.034837086995442
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,128,1,float16,fp8,0,10.5502560933431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,128,1,fp8,fp8,0,3.5266240437825522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,128,1,float16,float16,0,5.252655982971191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,128,1,float16,fp8,0,5.087461471557617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,128,1,float16,float16,0,4.956048011779785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,128,1,fp8,fp8,0,3.610666592915853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,128,1,float16,fp8,0,5.112256050109863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,128,1,fp8,fp8,0,3.5063788096110025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,128,1,float16,fp8,0,4.99454402923584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,128,1,float16,float16,0,5.299983978271484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,128,1,fp8,fp8,0,3.516218821207682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,128,1,float16,fp8,0,5.068458557128906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,128,1,float16,float16,0,5.175296147664388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,128,1,fp8,fp8,0,3.5247360865275064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,128,1,float16,float16,0,2.6707468032836914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,128,1,float16,fp8,0,2.6039466857910156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,128,1,float16,float16,0,2.6176212628682456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,128,1,float16,fp8,0,2.6243626276652017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,128,1,fp8,fp8,0,1.8933547337849934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,128,1,float16,float16,0,2.6341546376546225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,128,1,fp8,fp8,0,1.9302026430765789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,128,1,fp8,fp8,0,1.8965919812520344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,128,1,float16,fp8,0,2.599514643351237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,128,1,float16,float16,0,2.6381759643554688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,128,1,float16,fp8,0,2.603498617808024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,128,1,fp8,fp8,0,1.9033172925313313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,128,1,float16,float16,0,2.6342026392618814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,128,1,float16,fp8,0,2.611018657684326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,128,1,fp8,fp8,0,1.907551924387614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,128,1,fp8,fp8,0,18.507979075113933
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,128,1,float16,float16,0,27.5439936319987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,128,1,float16,fp8,0,26.781893412272137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,128,1,float16,fp8,0,26.89796193440755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,128,1,fp8,fp8,0,18.45788828531901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,128,1,float16,float16,0,26.995465596516926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,128,1,float16,float16,0,26.908121744791668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,128,1,float16,fp8,0,27.046485900878906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,128,1,float16,float16,0,13.820330301920572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,128,1,fp8,fp8,0,18.715941111246746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,128,1,float16,fp8,0,26.87388865152995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,128,1,float16,float16,0,27.65882619222005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,128,1,float16,fp8,0,13.717946370442709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,128,1,fp8,fp8,0,18.64268747965495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,128,1,float16,float16,0,13.595989227294922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,128,1,float16,fp8,0,13.821450551350912
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,128,1,fp8,fp8,0,9.36086400349935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,128,1,fp8,fp8,0,9.646474838256836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,128,1,float16,float16,0,13.632085164388021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,128,1,float16,fp8,0,13.760458628336588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,128,1,fp8,fp8,0,9.432842890421549
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,128,1,float16,float16,0,13.562554677327475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,128,1,fp8,fp8,0,9.322528203328451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,128,1,float16,fp8,0,13.406436920166016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,128,1,float16,float16,0,13.689584096272787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,128,1,float16,fp8,0,13.487135569254557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,128,1,float16,float16,0,6.737599690755208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,128,1,float16,fp8,0,6.727589289347331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,128,1,fp8,fp8,0,9.476677576700846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,128,1,fp8,fp8,0,4.58182938893636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,128,1,float16,float16,0,6.975664138793945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,128,1,float16,fp8,0,6.886938730875651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,128,1,fp8,fp8,0,4.780943870544434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,128,1,float16,float16,0,6.74391492207845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,128,1,fp8,fp8,0,4.593898773193359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,128,1,float16,fp8,0,6.4579518636067705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,128,1,float16,float16,0,6.612277348836263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,128,1,float16,fp8,0,6.522789637247722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,128,1,fp8,fp8,0,4.593242645263672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,128,1,float16,float16,0,6.655877431233724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,128,1,fp8,fp8,0,4.6505387624104815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,128,1,float16,fp8,0,6.75596809387207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,128,1,float16,float16,0,3.353290557861328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,128,1,float16,float16,0,3.330826759338379
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,128,1,float16,fp8,0,3.316800117492676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,128,1,float16,fp8,0,3.3891468048095703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,128,1,fp8,fp8,0,2.3948799769083657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,128,1,float16,float16,0,3.318965276082357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,128,1,float16,fp8,0,3.322357177734375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,128,1,fp8,fp8,0,2.478282610575358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,128,1,fp8,fp8,0,2.3953706423441568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,128,1,float16,float16,0,3.331658681233724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,128,1,float16,fp8,0,3.282464027404785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,128,1,fp8,fp8,0,2.4064159393310547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,128,1,float16,float16,0,3.368581453959147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,128,1,float16,float16,0,1.7832159996032715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,128,1,float16,fp8,0,1.7773653666178386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,128,1,float16,fp8,0,3.326682726542155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,128,1,fp8,fp8,0,2.4133493105570474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,128,1,float16,float16,0,1.7816425959269206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,128,1,float16,fp8,0,1.7614879608154297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,128,1,fp8,fp8,0,1.350719928741455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,128,1,fp8,fp8,0,1.3139839967091878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,128,1,float16,float16,0,1.788394610087077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,128,1,float16,fp8,0,1.779706637064616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,128,1,fp8,fp8,0,1.317850669225057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,128,1,float16,float16,0,1.795749346415202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,128,1,float16,fp8,0,1.7633760770161946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,128,1,fp8,fp8,0,1.3166186809539795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,128,1,float16,float16,0,1.7929706573486328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,128,1,float16,fp8,0,1.7842613855997722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,128,1,fp8,fp8,0,1.3209226926167805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,128,1,float16,float16,0,16.204293568929035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,128,1,fp8,fp8,0,11.17419687906901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,128,1,float16,fp8,0,15.85378646850586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,128,1,float16,float16,0,15.975140889485678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,128,1,float16,fp8,0,15.723995208740234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,128,1,fp8,fp8,0,11.221968332926432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,128,1,float16,fp8,0,15.78979746500651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,128,1,float16,float16,0,16.175615946451824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,128,1,float16,float16,0,8.105077107747396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,128,1,fp8,fp8,0,11.457344055175781
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,128,1,float16,float16,0,16.083141326904297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,128,1,float16,fp8,0,7.9491628011067705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,128,1,float16,fp8,0,16.01531728108724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,128,1,fp8,fp8,0,11.342100779215494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,128,1,float16,fp8,0,8.225525538126627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,128,1,float16,float16,0,8.026784261067709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,128,1,fp8,fp8,0,5.845296223958333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,128,1,fp8,fp8,0,5.6588694254557295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,128,1,float16,float16,0,7.920709609985352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,128,1,fp8,fp8,0,5.5278879801432295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,128,1,float16,fp8,0,7.909066518147786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,128,1,float16,float16,0,8.052495956420898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,128,1,float16,fp8,0,7.899887720743815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,128,1,fp8,fp8,0,5.50714111328125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,128,1,float16,float16,0,7.963754653930664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,128,1,float16,fp8,0,8.03712526957194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,128,1,float16,float16,0,3.8379945755004883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,128,1,float16,fp8,0,3.7675625483194985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,128,1,fp8,fp8,0,5.59446398417155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,128,1,fp8,fp8,0,2.81172784169515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,128,1,float16,float16,0,3.887056032816569
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,128,1,float16,fp8,0,4.0364640553792315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,128,1,float16,float16,0,3.928463935852051
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,128,1,fp8,fp8,0,2.9647839864095054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,128,1,float16,fp8,0,3.9258292516072593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,128,1,fp8,fp8,0,2.8173440297444663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,128,1,float16,float16,0,3.8936640421549478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,128,1,fp8,fp8,0,2.825455983479818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,128,1,float16,fp8,0,3.7855733235677085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,128,1,float16,float16,0,3.9186506271362305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,128,1,fp8,fp8,0,2.8428427378336587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,128,1,float16,fp8,0,3.985952059427897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,128,1,float16,float16,0,2.0063573519388833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,128,1,float16,fp8,0,1.975061257680257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,128,1,float16,float16,0,2.0313013394673667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,128,1,float16,fp8,0,2.000784079233805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,128,1,fp8,fp8,0,1.4938987096150715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,128,1,float16,float16,0,2.011237303415934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,128,1,float16,fp8,0,2.0014826456705728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,128,1,fp8,fp8,0,1.559871991475423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,128,1,fp8,fp8,0,1.4971946080525715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,128,1,float16,float16,0,2.009199937184652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,128,1,float16,fp8,0,1.9883413314819336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,128,1,fp8,fp8,0,1.4984532992045085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,128,1,float16,float16,0,2.0372427304585776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,128,1,float16,fp8,0,1.9920585950215657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,128,1,float16,float16,0,1.1019573211669922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,128,1,float16,fp8,0,1.0818346341451008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,128,1,float16,float16,0,1.1107093493143718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,128,1,fp8,fp8,0,0.8391146659851074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,128,1,float16,fp8,0,1.0936266581217449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,128,1,float16,float16,0,1.1051733493804932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,128,1,fp8,fp8,0,0.8642026583353678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,128,1,fp8,fp8,0,1.509749412536621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,128,1,float16,fp8,0,1.086016019185384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,128,1,fp8,fp8,0,0.8376320203145345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,128,1,float16,float16,0,1.1081120173136394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,128,1,float16,fp8,0,1.0930026372273762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,128,1,fp8,fp8,0,0.8401920000712076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,128,1,float16,float16,0,1.108405351638794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,128,1,float16,fp8,0,1.0882186889648438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,128,1,fp8,fp8,0,0.8458720048268636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,128,1,float16,float16,0,15.303162892659506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,128,1,float16,fp8,0,15.273760477701822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,128,1,fp8,fp8,0,11.163450876871744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,128,1,float16,float16,0,15.611194610595703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,128,1,float16,fp8,0,15.348373413085938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,128,1,fp8,fp8,0,11.19381332397461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,128,1,float16,float16,0,15.512004852294922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,128,1,float16,fp8,0,15.606826782226562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,128,1,float16,float16,0,7.92523193359375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,128,1,fp8,fp8,0,11.285050710042318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,128,1,float16,float16,0,15.66702397664388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,128,1,float16,fp8,0,15.42855962117513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,128,1,fp8,fp8,0,11.509482065836588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,128,1,float16,fp8,0,7.79417610168457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,128,1,float16,float16,0,7.8891251881917315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,128,1,float16,fp8,0,7.917903900146484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,128,1,fp8,fp8,0,5.583744049072266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,128,1,fp8,fp8,0,5.9354400634765625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,128,1,float16,float16,0,7.653909047444661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,128,1,float16,fp8,0,7.728357315063477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,128,1,fp8,fp8,0,5.6065012613932295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,128,1,float16,float16,0,7.878400166829427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,128,1,fp8,fp8,0,5.621290842692058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,128,1,float16,fp8,0,7.750826517740886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,128,1,float16,float16,0,7.761925379435222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,128,1,float16,fp8,0,7.769877115885417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,128,1,float16,float16,0,3.69596258799235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,128,1,float16,fp8,0,3.6352532704671225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,128,1,fp8,fp8,0,5.642127990722656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,128,1,float16,float16,0,3.8392747243245444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,128,1,fp8,fp8,0,2.8280747731526694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,128,1,float16,float16,0,3.719872156778971
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,128,1,float16,fp8,0,3.9403680165608725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,128,1,fp8,fp8,0,2.827962557474772
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,128,1,float16,fp8,0,3.7072426478068032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,128,1,fp8,fp8,0,3.02349853515625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,128,1,float16,float16,0,3.770965258280436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,128,1,float16,fp8,0,3.73468812306722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,128,1,fp8,fp8,0,2.8420372009277344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,128,1,float16,float16,0,3.7692267100016275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,128,1,fp8,fp8,0,2.8637173970540366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,128,1,float16,float16,0,1.918549378712972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,128,1,float16,fp8,0,3.704085350036621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,128,1,float16,fp8,0,1.8807679812113445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,128,1,float16,float16,0,1.9581173261006672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,128,1,float16,fp8,0,1.9731094042460124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,128,1,fp8,fp8,0,1.4721760749816895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,128,1,float16,float16,0,1.9149600664774578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,128,1,fp8,fp8,0,1.5797813733418782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,128,1,fp8,fp8,0,1.4766346613566081
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,128,1,float16,fp8,0,1.8900319735209148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,128,1,float16,float16,0,1.9183573722839355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,128,1,float16,fp8,0,1.8871253331502278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,128,1,fp8,fp8,0,1.4831627209981282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,128,1,float16,float16,0,1.9254612922668457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,128,1,float16,float16,0,1.0273386637369792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,128,1,float16,fp8,0,1.0094239711761475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,128,1,float16,fp8,0,1.8955200513203938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,128,1,fp8,fp8,0,0.7990453243255615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,128,1,fp8,fp8,0,1.4900320370992024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,128,1,float16,fp8,0,1.0374186833699544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,128,1,float16,float16,0,1.0511146386464436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,128,1,float16,float16,0,1.028048038482666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,128,1,float16,fp8,0,1.011354684829712
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,128,1,fp8,fp8,0,0.8003040154774984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,128,1,float16,float16,0,1.0270506540934246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,128,1,float16,fp8,0,1.006661335627238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,128,1,fp8,fp8,0,0.8023893038431803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,128,1,fp8,fp8,0,0.8474666277567545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,128,1,float16,float16,0,1.0350773334503174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,128,1,float16,fp8,0,1.0121813615163167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,128,1,fp8,fp8,0,0.8091093699137369
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,128,1,float16,float16,0,0.5822986761728922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,128,1,fp8,fp8,0,0.44143466154734295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,128,1,float16,float16,0,0.5849599838256836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,128,1,float16,fp8,0,0.5701333284378052
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,128,1,fp8,fp8,0,0.46541865666707355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,128,1,float16,fp8,0,0.5770453214645386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,128,1,float16,float16,0,0.5843573411305746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,128,1,float16,fp8,0,0.5729920069376627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,128,1,fp8,fp8,0,0.4419413407643636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,128,1,float16,float16,0,0.5850773255030314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,128,1,float16,fp8,0,0.5733760197957357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,128,1,float16,fp8,0,0.5741333166758219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,128,1,float16,float16,0,0.5840266545613607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,128,1,fp8,fp8,0,0.442848006884257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,128,1,fp8,fp8,0,0.44658132394154865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,128,1,float16,float16,0,9.53764279683431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,128,1,float16,fp8,0,9.222000122070312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,128,1,float16,float16,0,9.423226674397787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,128,1,float16,fp8,0,9.319125493367514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,128,1,fp8,fp8,0,7.080650965372722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,128,1,fp8,fp8,0,7.149402618408203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,128,1,float16,fp8,0,9.353706359863281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,128,1,float16,float16,0,9.5622189839681
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,128,1,fp8,fp8,0,7.167664210001628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,128,1,float16,float16,0,4.555610656738281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,128,1,float16,float16,0,9.381439844767252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,128,1,float16,fp8,0,9.294362386067709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,128,1,fp8,fp8,0,7.157087961832683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,128,1,float16,float16,0,4.812277475992839
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,128,1,float16,fp8,0,4.7698666254679365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,128,1,fp8,fp8,0,3.8396533330281577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,128,1,float16,fp8,0,4.46399466196696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,128,1,fp8,fp8,0,3.5494826634724936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,128,1,float16,float16,0,4.5229387283325195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,128,1,float16,fp8,0,4.489850680033366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,128,1,fp8,fp8,0,3.555370648701986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,128,1,float16,float16,0,4.6234134038289385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,128,1,float16,fp8,0,4.534912109375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,128,1,fp8,fp8,0,3.5736586252848306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,128,1,float16,float16,0,4.626698811848958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,128,1,float16,fp8,0,4.566725413004558
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,128,1,float16,float16,0,2.3048106829325357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,128,1,fp8,fp8,0,3.605440139770508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,128,1,float16,fp8,0,2.246895949045817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,128,1,fp8,fp8,0,1.8165067036946614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,128,1,float16,float16,0,2.3807573318481445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,128,1,float16,fp8,0,2.3562240600585938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,128,1,float16,float16,0,2.2960480054219565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,128,1,float16,fp8,0,2.2666667302449546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,128,1,fp8,fp8,0,1.8200213114420574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,128,1,fp8,fp8,0,1.9669067064921062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,128,1,float16,float16,0,2.2997652689615884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,128,1,float16,fp8,0,2.2750986417134604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,128,1,fp8,fp8,0,1.827797253926595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,128,1,float16,float16,0,2.3060426712036133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,128,1,float16,fp8,0,2.2704480489095054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,128,1,float16,float16,0,1.202677329381307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,128,1,fp8,fp8,0,1.8435680071512859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,128,1,float16,fp8,0,1.1790986855824788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,128,1,float16,float16,0,1.245136022567749
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,128,1,float16,fp8,0,1.2216959794362385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,128,1,fp8,fp8,0,0.956554651260376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,128,1,fp8,fp8,0,1.0462933381398518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,128,1,float16,float16,0,1.1994240283966064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,128,1,float16,fp8,0,1.1817599932352703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,128,1,fp8,fp8,0,0.9595946470896403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,128,1,float16,float16,0,1.203770637512207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,128,1,float16,fp8,0,1.1821226278940837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,128,1,fp8,fp8,0,0.9618879954020182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,128,1,float16,float16,0,1.204954703648885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,128,1,float16,fp8,0,1.1872373421986897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,128,1,fp8,fp8,0,0.9710240364074707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,128,1,float16,float16,0,0.6537866592407227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,128,1,float16,fp8,0,0.6367733478546143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,128,1,float16,float16,0,0.6714933713277181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,128,1,float16,fp8,0,0.664576013882955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,128,1,fp8,fp8,0,0.5271519819895426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,128,1,float16,float16,0,0.6562079985936483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,128,1,fp8,fp8,0,0.5658186674118042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,128,1,float16,fp8,0,0.6403573354085287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,128,1,fp8,fp8,0,0.5297600030899048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,128,1,float16,float16,0,0.655514677365621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,128,1,float16,fp8,0,0.6431039969126383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,128,1,fp8,fp8,0,0.5306186676025391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,128,1,float16,float16,0,0.6569386720657349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,128,1,float16,fp8,0,0.6440746784210205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,128,1,fp8,fp8,0,0.5353279908498129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,128,1,float16,float16,0,0.38092267513275146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,128,1,float16,float16,0,0.3853280146916707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,128,1,float16,fp8,0,0.3715733289718628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,128,1,float16,fp8,0,0.3823253313700358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,128,1,fp8,fp8,0,0.31569067637125653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,128,1,float16,float16,0,0.38146666685740155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,128,1,float16,fp8,0,0.3720533450444539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,128,1,fp8,fp8,0,0.29606399933497113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,128,1,fp8,fp8,0,0.29717334111531574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,128,1,float16,fp8,0,0.3750986655553182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,128,1,float16,float16,0,0.3825013240178426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,128,1,fp8,fp8,0,0.2982826630274455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,128,1,float16,float16,0,0.38307734330495197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,128,1,float16,fp8,0,0.3751680056254069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,128,1,fp8,fp8,0,0.2992159922917684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,128,1,float16,float16,0,9.748640060424805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,128,1,float16,fp8,0,9.742975870768229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,128,1,fp8,fp8,0,7.803258895874023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,128,1,float16,fp8,0,9.419626871744791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,128,1,fp8,fp8,0,7.786944071451823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,128,1,float16,float16,0,9.697984059651693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,128,1,float16,float16,0,9.73416519165039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,128,1,float16,fp8,0,9.484549204508463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,128,1,float16,float16,0,4.686202685038249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,128,1,float16,float16,0,9.910858790079752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,128,1,fp8,fp8,0,7.802741368611653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,128,1,float16,fp8,0,4.641034762064616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,128,1,float16,fp8,0,9.66433588663737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,128,1,float16,float16,0,4.929210662841797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,128,1,fp8,fp8,0,7.881925582885742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,128,1,float16,fp8,0,4.9572798411051435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,128,1,fp8,fp8,0,4.273802757263184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,128,1,fp8,fp8,0,3.8871892293294272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,128,1,float16,float16,0,4.6954294840494795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,128,1,float16,fp8,0,4.592149416605632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,128,1,float16,float16,0,4.708287874857585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,128,1,fp8,fp8,0,3.89244810740153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,128,1,float16,fp8,0,4.585461298624675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,128,1,fp8,fp8,0,3.9044265747070312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,128,1,float16,float16,0,4.827605247497559
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,128,1,float16,float16,0,2.3511786460876465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,128,1,float16,fp8,0,2.293018658955892
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,128,1,float16,fp8,0,4.639109293619792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,128,1,fp8,fp8,0,3.9408960342407227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,128,1,fp8,fp8,0,1.9507253964742024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,128,1,float16,float16,0,2.484565258026123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,128,1,float16,float16,0,2.357386589050293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,128,1,float16,fp8,0,2.296741326649984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,128,1,fp8,fp8,0,1.9579359690348308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,128,1,float16,fp8,0,2.4448320070902505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,128,1,float16,float16,0,2.3572746912638345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,128,1,fp8,fp8,0,2.1720053354899087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,128,1,float16,fp8,0,2.305845260620117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,128,1,fp8,fp8,0,1.9687306086222331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,128,1,float16,float16,0,2.3750294049580893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,128,1,float16,fp8,0,2.322970708211263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,128,1,float16,float16,0,1.2148746649424236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,128,1,float16,fp8,0,1.1856373151143391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,128,1,fp8,fp8,0,1.989770730336507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,128,1,float16,float16,0,1.2695999940236409
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,128,1,fp8,fp8,0,1.0156266689300537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,128,1,float16,float16,0,1.2203093369801838
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,128,1,fp8,fp8,0,1.1205066839853923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,128,1,float16,fp8,0,1.2593973477681477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,128,1,fp8,fp8,0,1.0122186342875164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,128,1,float16,fp8,0,1.1880213419596355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,128,1,float16,float16,0,1.223130702972412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,128,1,float16,fp8,0,1.1898399988810222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,128,1,fp8,fp8,0,1.0204799969991047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,128,1,float16,float16,0,1.2249066829681396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,128,1,float16,fp8,0,1.2023200194040935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,128,1,fp8,fp8,0,1.0262347062428792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,128,1,float16,float16,0,0.6726079781850179
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,128,1,float16,fp8,0,0.6662559906641642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,128,1,float16,float16,0,0.6444373528162638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,128,1,fp8,fp8,0,0.602133313814799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,128,1,fp8,fp8,0,0.5460160175959269
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,128,1,float16,fp8,0,0.6307040055592855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,128,1,float16,float16,0,0.6463359991709391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,128,1,float16,fp8,0,0.6317439874013265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,128,1,fp8,fp8,0,0.546064019203186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,128,1,float16,float16,0,0.6488853295644125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,128,1,float16,fp8,0,0.6332053343454996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,128,1,fp8,fp8,0,0.548314650853475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,128,1,float16,float16,0,0.653498649597168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,128,1,float16,fp8,0,0.6388800144195557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,128,1,float16,float16,0,0.36051201820373535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,128,1,fp8,fp8,0,0.5538026491800944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,128,1,float16,float16,0,0.3730826775232951
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,128,1,float16,fp8,0,0.3519093195597331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,128,1,float16,fp8,0,0.36817065874735516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,128,1,fp8,fp8,0,0.29498666524887085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,128,1,float16,float16,0,0.36373333136240643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,128,1,float16,fp8,0,0.3515626589457194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,128,1,fp8,fp8,0,0.2948746681213379
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,128,1,float16,float16,0,0.36371199289957684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,128,1,fp8,fp8,0,0.3285013238588969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,128,1,float16,fp8,0,0.35530134042104083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,128,1,fp8,fp8,0,0.2967200080553691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,128,1,float16,float16,0,0.36667199929555255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,128,1,float16,fp8,0,0.3570133447647095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,128,1,fp8,fp8,0,0.2982133428255717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,128,1,float16,float16,0,0.20247999827067056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,128,1,float16,fp8,0,0.1965706745783488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,128,1,float16,float16,0,0.21371199687321982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,128,1,float16,fp8,0,0.2090346614519755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,128,1,fp8,fp8,0,0.19290133317311606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,128,1,fp8,fp8,0,0.1790026624997457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,128,1,float16,fp8,0,0.19597333669662476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,128,1,fp8,fp8,0,0.1789919932683309
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,128,1,float16,fp8,0,0.19751467307408652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,128,1,float16,float16,0,0.20305599768956503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,128,1,fp8,fp8,0,0.17878933747609457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,128,1,float16,float16,0,0.20324800411860147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,128,1,float16,float16,0,0.2033066749572754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,128,1,float16,fp8,0,0.199072003364563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,128,1,fp8,fp8,0,0.1804906725883484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,128,1,float16,float16,0,6.202325185139974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,128,1,float16,fp8,0,6.075509389241536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,128,1,float16,fp8,0,5.973962783813477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,128,1,fp8,fp8,0,5.202725410461426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,128,1,float16,float16,0,6.110383987426758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,128,1,fp8,fp8,0,5.197525342305501
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,128,1,float16,float16,0,6.198528289794922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,128,1,float16,fp8,0,5.950063705444336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,128,1,float16,float16,0,3.010784149169922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,128,1,fp8,fp8,0,5.237146695454915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,128,1,float16,float16,0,6.284325281778972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,128,1,float16,fp8,0,6.044741312662761
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,128,1,float16,fp8,0,2.921034812927246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,128,1,fp8,fp8,0,5.288026809692383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,128,1,float16,float16,0,3.2258453369140625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,128,1,fp8,fp8,0,2.596266587575277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,128,1,float16,fp8,0,3.1922613779703775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,128,1,fp8,fp8,0,2.897418657938639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,128,1,float16,fp8,0,2.929855982462565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,128,1,float16,float16,0,3.0248851776123047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,128,1,fp8,fp8,0,2.5950719515482583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,128,1,float16,float16,0,3.0304266611735025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,128,1,fp8,fp8,0,2.6155306498209634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,128,1,float16,fp8,0,2.9382187525431314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,128,1,float16,fp8,0,2.9661919275919595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,128,1,float16,float16,0,3.052074750264486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,128,1,float16,float16,0,1.5266505877176921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,128,1,float16,fp8,0,1.483781337738037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,128,1,fp8,fp8,0,2.6426666577657065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,128,1,float16,fp8,0,1.602783997853597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,128,1,fp8,fp8,0,1.3138079643249512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,128,1,float16,float16,0,1.644858678181966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,128,1,fp8,fp8,0,1.483359972635905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,128,1,float16,float16,0,1.531823952992757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,128,1,float16,fp8,0,1.490122636159261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,128,1,fp8,fp8,0,1.3148586750030518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,128,1,float16,float16,0,1.5348960558573406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,128,1,float16,fp8,0,1.4934132893880208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,128,1,float16,float16,0,1.5443520545959473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,128,1,fp8,fp8,0,1.3257066408793132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,128,1,float16,fp8,0,1.5072852770487468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,128,1,fp8,fp8,0,1.3422293663024902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,128,1,float16,float16,0,0.7938079833984375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,128,1,float16,fp8,0,0.774949312210083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,128,1,float16,float16,0,0.8482560316721598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,128,1,fp8,fp8,0,0.688373327255249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,128,1,float16,fp8,0,0.776634693145752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,128,1,float16,float16,0,0.7982186476389567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,128,1,float16,fp8,0,0.831706682840983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,128,1,fp8,fp8,0,0.7721439997355143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,128,1,fp8,fp8,0,0.6882293224334717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,128,1,float16,float16,0,0.7992800076802572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,128,1,float16,fp8,0,0.7810719807942709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,128,1,fp8,fp8,0,0.6932266553243002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,128,1,float16,float16,0,0.8059893449147543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,128,1,float16,fp8,0,0.7848320007324219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,128,1,fp8,fp8,0,0.7005279858907064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,128,1,float16,float16,0,0.45079998175303143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,128,1,float16,float16,0,0.429637352625529
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,128,1,float16,fp8,0,0.4171573321024577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,128,1,float16,fp8,0,0.44598400592803955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,128,1,fp8,fp8,0,0.3739466667175293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,128,1,float16,float16,0,0.4306453466415405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,128,1,float16,fp8,0,0.41834131876627606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,128,1,fp8,fp8,0,0.4196053345998128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,128,1,fp8,fp8,0,0.3755360047022502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,128,1,float16,float16,0,0.43300267060597736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,128,1,float16,fp8,0,0.4203519821166992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,128,1,fp8,fp8,0,0.3766026496887207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,128,1,float16,float16,0,0.43517335255940753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,128,1,float16,fp8,0,0.4240479866663615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,128,1,fp8,fp8,0,0.379802664120992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,128,1,float16,float16,0,0.24393600225448608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,128,1,float16,float16,0,0.25677865743637085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,128,1,float16,fp8,0,0.23607999086380005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,128,1,fp8,fp8,0,0.2074506680170695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,128,1,float16,fp8,0,0.252346674601237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,128,1,fp8,fp8,0,0.23069866498311362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,128,1,float16,float16,0,0.24508267641067505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,128,1,fp8,fp8,0,0.207696000734965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,128,1,float16,fp8,0,0.23694399992624918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,128,1,float16,float16,0,0.24717867374420166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,128,1,float16,fp8,0,0.23917865753173828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,128,1,fp8,fp8,0,0.20827732483545938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,128,1,float16,float16,0,0.24794665972391763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,128,1,float16,fp8,0,0.2420639991760254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,128,1,fp8,fp8,0,0.2108373244603475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,128,1,float16,fp8,0,0.13794666528701782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,128,1,float16,float16,0,0.14814399679501852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,128,1,fp8,fp8,0,0.12707733114560446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,128,1,float16,fp8,0,0.14571733276049295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,128,1,float16,float16,0,0.14095466335614523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,128,1,fp8,fp8,0,0.13962133725484213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,128,1,float16,fp8,0,0.13795733451843262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,128,1,float16,float16,0,0.1395093301932017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,128,1,fp8,fp8,0,0.1274773379166921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,128,1,float16,float16,0,0.14061866203943887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,128,1,float16,fp8,0,0.1378666659196218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,128,1,float16,float16,0,0.14180800318717957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,128,1,float16,fp8,0,0.13784000277519226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,128,1,fp8,fp8,0,0.13156800468762717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,128,1,fp8,fp8,0,0.1285653313000997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,128,1,float16,float16,0,5.848837534586589
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,128,1,float16,fp8,0,5.924048105875651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,128,1,fp8,fp8,0,5.18394660949707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,128,1,float16,float16,0,5.980986913045247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,128,1,float16,fp8,0,6.153957366943359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,128,1,fp8,fp8,0,5.3350880940755205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,128,1,float16,float16,0,6.152085622151692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,128,1,float16,fp8,0,6.137061436971028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,128,1,float16,float16,0,2.926117261250814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,128,1,fp8,fp8,0,5.766821543375651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,128,1,fp8,fp8,0,5.822805404663086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,128,1,float16,fp8,0,6.333626429239909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,128,1,float16,float16,0,6.210063934326172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,128,1,float16,float16,0,3.4283679326375327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,128,1,float16,fp8,0,3.3445866902669272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,128,1,fp8,fp8,0,2.9924373626708984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,128,1,float16,fp8,0,2.9021120071411133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,128,1,fp8,fp8,0,2.593850612640381
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,128,1,float16,float16,0,2.9725173314412436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,128,1,float16,fp8,0,2.9482294718424478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,128,1,fp8,fp8,0,2.666874567667643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,128,1,float16,float16,0,3.0829013188680015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,128,1,float16,fp8,0,3.0839198430379233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,128,1,fp8,fp8,0,2.891087849934896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,128,1,float16,float16,0,1.4659466743469238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,128,1,float16,float16,0,3.083658536275228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,128,1,float16,fp8,0,3.1231199900309243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,128,1,float16,fp8,0,1.4618560473124187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,128,1,fp8,fp8,0,2.9265387852986655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,128,1,float16,float16,0,1.7023040453592937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,128,1,fp8,fp8,0,1.3080213069915771
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,128,1,float16,float16,0,1.4820906321207683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,128,1,fp8,fp8,0,1.340677261352539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,128,1,float16,fp8,0,1.4765653610229492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,128,1,float16,float16,0,1.5429066022237141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,128,1,float16,fp8,0,1.6572960217793782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,128,1,fp8,fp8,0,1.4835467338562012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,128,1,float16,fp8,0,1.5364640553792317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,128,1,fp8,fp8,0,1.4619040489196777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,128,1,float16,float16,0,1.5450026194254558
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,128,1,float16,float16,0,0.7476053237915039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,128,1,float16,fp8,0,1.5709813435872395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,128,1,fp8,fp8,0,1.4694506327311199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,128,1,float16,fp8,0,0.7473493417104086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,128,1,float16,float16,0,0.857151985168457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,128,1,float16,fp8,0,0.8364480336507162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,128,1,fp8,fp8,0,0.6579999923706055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,128,1,float16,float16,0,0.754634698232015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,128,1,fp8,fp8,0,0.757263978322347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,128,1,float16,fp8,0,0.754309336344401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,128,1,fp8,fp8,0,0.6791946887969971
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,128,1,float16,float16,0,0.7774933179219564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,128,1,float16,fp8,0,0.7743146419525146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,128,1,fp8,fp8,0,0.741429328918457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,128,1,float16,float16,0,0.782368024190267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,128,1,float16,fp8,0,0.7892320156097412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,128,1,fp8,fp8,0,0.7484213511149088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,128,1,float16,float16,0,0.3888213237126668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,128,1,float16,float16,0,0.44090131918589276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,128,1,float16,fp8,0,0.38759998480478924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,128,1,fp8,fp8,0,0.3400213321050008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,128,1,float16,fp8,0,0.4313173294067383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,128,1,float16,float16,0,0.3907253344853719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,128,1,fp8,fp8,0,0.390341321627299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,128,1,float16,fp8,0,0.39188798268636066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,128,1,fp8,fp8,0,0.35019199053446454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,128,1,float16,fp8,0,0.40165865421295166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,128,1,float16,float16,0,0.40201600392659503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,128,1,fp8,fp8,0,0.38099201520284015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,128,1,float16,fp8,0,0.40372268358866376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,128,1,float16,float16,0,0.20734934012095133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,128,1,fp8,fp8,0,0.38661332925160724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,128,1,float16,float16,0,0.23583465814590454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,128,1,float16,float16,0,0.4055519898732503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,128,1,float16,fp8,0,0.2302453319231669
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,128,1,float16,fp8,0,0.20889067649841309
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,128,1,fp8,fp8,0,0.20430399974187216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,128,1,fp8,fp8,0,0.17588265736897787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,128,1,float16,float16,0,0.20916267236073813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,128,1,float16,fp8,0,0.20971733331680298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,128,1,fp8,fp8,0,0.1781866749127706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,128,1,float16,float16,0,0.21499200661977133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,128,1,float16,fp8,0,0.2146880030632019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,128,1,fp8,fp8,0,0.19494932889938354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,128,1,float16,float16,0,0.21923200289408365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,128,1,float16,fp8,0,0.21594667434692383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,128,1,fp8,fp8,0,0.19728533426920572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,128,1,float16,float16,0,0.1111306647459666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,128,1,float16,float16,0,0.13169599572817484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,128,1,float16,fp8,0,0.11150933305422465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,128,1,float16,fp8,0,0.12844799955685934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,128,1,fp8,fp8,0,0.11095999677975972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,128,1,fp8,fp8,0,0.095551997423172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,128,1,float16,float16,0,0.11111467083295186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,128,1,float16,fp8,0,0.11151466766993205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,128,1,fp8,fp8,0,0.09806933005650838
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,128,1,float16,float16,0,0.11426666378974915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,128,1,float16,fp8,0,0.11409599582354228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,128,1,float16,float16,0,0.11366400122642517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,128,1,fp8,fp8,0,0.1039573351542155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,128,1,float16,fp8,0,0.11382933457692464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,128,1,fp8,fp8,0,0.10593600074450175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,128,1,float16,float16,0,0.06181333462397257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,128,1,float16,float16,0,0.07042133311430614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,128,1,float16,fp8,0,0.06941866874694824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,128,1,float16,fp8,0,0.06205333272616068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,128,1,fp8,fp8,0,0.06630399823188782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,128,1,fp8,fp8,0,0.0563679983218511
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,128,1,float16,float16,0,0.062047998110453285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,128,1,float16,fp8,0,0.06225599845250448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,128,1,fp8,fp8,0,0.057328000664711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,128,1,float16,float16,0,0.06374933322270711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,128,1,float16,fp8,0,0.06340266764163971
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,128,1,fp8,fp8,0,0.060266668597857155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,128,1,float16,float16,0,0.0633493314186732
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,128,1,float16,fp8,0,0.06380266447861989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,128,1,fp8,fp8,0,0.06118933359781901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,1,128,1,float16,fp8,0,4.636159896850586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,1,128,1,fp8,fp8,0,4.372661272684733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,1,128,1,float16,float16,0,4.619045257568359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,2,128,1,float16,float16,0,4.694346745808919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,2,128,1,fp8,fp8,0,4.432671864827474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,2,128,1,float16,fp8,0,4.673151969909668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,4,128,1,float16,float16,0,4.887434641520183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,4,128,1,float16,fp8,0,4.889258702596028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,1,128,1,float16,float16,0,2.2929439544677734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,4,128,1,fp8,fp8,0,4.872373263041179
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,8,128,1,float16,float16,0,4.930560111999512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,8,128,1,float16,fp8,0,4.965397198994954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,48,8,128,1,fp8,fp8,0,4.920661290486653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,1,128,1,float16,fp8,0,2.261376063028971
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,48,128,1,float16,float16,0,2.773296038309733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,48,128,1,float16,fp8,0,2.653322696685791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,48,128,1,fp8,fp8,0,2.5377440452575684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,1,128,1,fp8,fp8,0,2.1487785975138345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,2,128,1,float16,float16,0,2.3234453201293945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,2,128,1,float16,fp8,0,2.3162879943847656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,2,128,1,fp8,fp8,0,2.2150185902913413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,4,128,1,float16,float16,0,2.435434659322103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,4,128,1,float16,fp8,0,2.4181973139444985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,4,128,1,fp8,fp8,0,2.438053290049235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,8,128,1,float16,float16,0,2.4487627347310386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,1,128,1,float16,float16,0,1.140997330347697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,1,128,1,float16,fp8,0,1.1357066631317139
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,8,128,1,float16,fp8,0,2.4779946009318032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,48,8,128,1,fp8,fp8,0,2.473690668741862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,48,128,1,float16,float16,0,1.3742292722066243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,48,128,1,float16,fp8,0,1.3325440088907878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,1,128,1,fp8,fp8,0,1.0793920358022053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,2,128,1,float16,float16,0,1.153450647989909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,48,128,1,fp8,fp8,0,1.2626720269521077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,2,128,1,fp8,fp8,0,1.120527982711792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,2,128,1,float16,fp8,0,1.1503307024637859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,4,128,1,float16,float16,0,1.2144532998402913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,4,128,1,float16,fp8,0,1.2128000259399414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,4,128,1,fp8,fp8,0,1.223130702972412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,8,128,1,float16,float16,0,1.2247520287831624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,8,128,1,float16,fp8,0,1.2418346405029297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,48,8,128,1,fp8,fp8,0,1.2441706657409668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,1,128,1,float16,float16,0,0.5831946531931559
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,1,128,1,float16,fp8,0,0.5795413255691528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,48,128,1,float16,float16,0,0.6922880013783773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,48,128,1,float16,fp8,0,0.6732266743977865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,1,128,1,fp8,fp8,0,0.5461653470993042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,48,128,1,fp8,fp8,0,0.6385600169499716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,2,128,1,float16,float16,0,0.5902933279673258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,2,128,1,float16,fp8,0,0.5879679918289185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,2,128,1,fp8,fp8,0,0.5662453174591064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,4,128,1,float16,float16,0,0.6115306615829468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,4,128,1,float16,fp8,0,0.6099146604537964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,4,128,1,fp8,fp8,0,0.624944011370341
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,8,128,1,float16,float16,0,0.6164746681849161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,8,128,1,fp8,fp8,0,0.6301493247350057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,48,8,128,1,float16,fp8,0,0.6210879882176717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,1,128,1,float16,float16,0,0.30130134026209515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,1,128,1,float16,fp8,0,0.30052266518274945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,1,128,1,fp8,fp8,0,0.2810186743736267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,48,128,1,float16,float16,0,0.3593386809031169
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,48,128,1,float16,fp8,0,0.3505653142929077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,2,128,1,float16,float16,0,0.30399467547734577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,2,128,1,float16,fp8,0,0.30508265892664593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,48,128,1,fp8,fp8,0,0.32914666334788006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,2,128,1,fp8,fp8,0,0.28836800654729206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,4,128,1,float16,float16,0,0.31462399164835614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,4,128,1,float16,fp8,0,0.3155253330866496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,4,128,1,fp8,fp8,0,0.3225333293279012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,8,128,1,float16,float16,0,0.3195893367131551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,8,128,1,float16,fp8,0,0.31801066795984906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,48,8,128,1,fp8,fp8,0,0.32578132549921673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,48,128,1,float16,float16,0,0.19155200322469076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,48,128,1,float16,fp8,0,0.18710400660832724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,1,128,1,float16,float16,0,0.16359999775886536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,1,128,1,float16,fp8,0,0.16292267044385275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,1,128,1,fp8,fp8,0,0.1518239974975586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,2,128,1,float16,float16,0,0.16386133432388306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,48,128,1,fp8,fp8,0,0.17463467518488565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,2,128,1,float16,fp8,0,0.16310933232307434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,2,128,1,fp8,fp8,0,0.1516746679941813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,4,128,1,float16,float16,0,0.1678453286488851
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,4,128,1,float16,fp8,0,0.16963199774424234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,4,128,1,fp8,fp8,0,0.16644799709320068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,8,128,1,float16,float16,0,0.1723466714223226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,8,128,1,float16,fp8,0,0.16963199774424234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,48,8,128,1,fp8,fp8,0,0.1697546641031901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,1,128,1,float16,float16,0,0.09105599919954936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,48,128,1,float16,float16,0,0.11002666751543681
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,48,128,1,float16,fp8,0,0.10789866248766582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,48,128,1,fp8,fp8,0,0.0953439970811208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,1,128,1,float16,fp8,0,0.08995200196901958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,2,128,1,float16,float16,0,0.09085333347320557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,1,128,1,fp8,fp8,0,0.08418666323026021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,2,128,1,float16,fp8,0,0.0909440020720164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,2,128,1,fp8,fp8,0,0.08496532837549846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,4,128,1,float16,float16,0,0.09385599692662557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,4,128,1,float16,fp8,0,0.09422933061917622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,4,128,1,fp8,fp8,0,0.09041066964467366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,8,128,1,float16,float16,0,0.09335999687512715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,8,128,1,float16,fp8,0,0.09318400422732036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,48,8,128,1,fp8,fp8,0,0.09268800417582194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,48,128,1,float16,float16,0,0.06043200194835663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,48,128,1,float16,fp8,0,0.059978668888409935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,1,128,1,float16,float16,0,0.05150933563709259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,1,128,1,float16,fp8,0,0.051813334226608276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,48,128,1,fp8,fp8,0,0.057301332553227745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,1,128,1,fp8,fp8,0,0.048357332746187844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,2,128,1,float16,float16,0,0.052111998200416565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,2,128,1,fp8,fp8,0,0.049327999353408813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,2,128,1,float16,fp8,0,0.05225066840648651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,4,128,1,float16,float16,0,0.053082664807637535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,4,128,1,float16,fp8,0,0.05374933282534281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,4,128,1,fp8,fp8,0,0.05357333521048228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,8,128,1,float16,float16,0,0.05339199801286062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,8,128,1,float16,fp8,0,0.053445334235827126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,48,8,128,1,fp8,fp8,0,0.05328000088532766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,48,128,1,float16,float16,0,0.037274666130542755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,48,128,1,float16,fp8,0,0.03753600021203359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,48,128,1,fp8,fp8,0,0.03698666642109553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,1,128,1,float16,float16,0,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,1,128,1,float16,fp8,0,0.03435199956099192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,1,128,1,fp8,fp8,0,0.03292266776164373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,2,128,1,float16,float16,0,0.03380800038576126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,2,128,1,float16,fp8,0,0.03429866582155228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,2,128,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,4,128,1,float16,float16,0,0.03468266626199087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,4,128,1,float16,fp8,0,0.03479466587305069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,4,128,1,fp8,fp8,0,0.03472533325354258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,8,128,1,float16,float16,0,0.03458133339881897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,8,128,1,float16,fp8,0,0.0348693331082662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,48,8,128,1,fp8,fp8,0,0.03512533257404963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,1,128,1,float16,fp8,0,1.9252373377482097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,1,128,1,float16,float16,0,1.9356266657511394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,1,128,1,fp8,fp8,0,1.917301336924235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,2,128,1,float16,float16,0,2.006122589111328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,2,128,1,fp8,fp8,0,1.9399627049763997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,2,128,1,float16,fp8,0,1.9961919784545898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,4,128,1,float16,fp8,0,2.1052799224853516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,4,128,1,float16,float16,0,2.1094719568888345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,1,128,1,float16,float16,0,0.9787200291951498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,4,128,1,fp8,fp8,0,2.1629172960917153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,8,128,1,float16,float16,0,2.10916805267334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,8,128,1,float16,fp8,0,2.144474665323893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,48,8,128,1,fp8,fp8,0,2.207482655843099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,48,128,1,float16,float16,0,1.2180480162302654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,1,128,1,float16,fp8,0,0.97435196240743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,48,128,1,float16,fp8,0,1.1749866803487141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,1,128,1,fp8,fp8,0,0.9306506315867106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,48,128,1,fp8,fp8,0,1.1416640281677246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,2,128,1,float16,float16,0,0.9918453693389893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,2,128,1,float16,fp8,0,0.9856692949930826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,2,128,1,fp8,fp8,0,0.9742293357849121
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,4,128,1,float16,float16,0,1.0527413686116536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,4,128,1,fp8,fp8,0,1.0853280226389568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,4,128,1,float16,fp8,0,1.0451626777648926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,8,128,1,float16,float16,0,1.054154634475708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,8,128,1,float16,fp8,0,1.0773599942525227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,48,8,128,1,fp8,fp8,0,1.1057973702748616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,1,128,1,float16,float16,0,0.5003360112508138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,48,128,1,float16,float16,0,0.6117546558380127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,1,128,1,float16,fp8,0,0.4981600046157837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,48,128,1,float16,fp8,0,0.5960053205490112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,1,128,1,fp8,fp8,0,0.4726879994074504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,2,128,1,float16,float16,0,0.5055786768595377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,2,128,1,float16,fp8,0,0.5052640040715536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,2,128,1,fp8,fp8,0,0.4936533371607463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,4,128,1,float16,float16,0,0.5271146694819132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,48,128,1,fp8,fp8,0,0.5766933361689249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,4,128,1,fp8,fp8,0,0.5556960105895996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,8,128,1,float16,float16,0,0.5298986832300822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,4,128,1,float16,fp8,0,0.5243626832962036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,8,128,1,float16,fp8,0,0.5363519986470541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,48,8,128,1,fp8,fp8,0,0.5625600020090739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,1,128,1,float16,float16,0,0.2592693368593852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,1,128,1,float16,fp8,0,0.2579039931297302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,48,128,1,float16,float16,0,0.31759466727574664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,48,128,1,float16,fp8,0,0.3081600069999695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,1,128,1,fp8,fp8,0,0.24591465791066489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,48,128,1,fp8,fp8,0,0.29874666531880695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,2,128,1,float16,fp8,0,0.2620533307393392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,2,128,1,float16,float16,0,0.26233599583307904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,2,128,1,fp8,fp8,0,0.2552586595217387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,4,128,1,float16,float16,0,0.2717919945716858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,4,128,1,float16,fp8,0,0.27180800835291546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,4,128,1,fp8,fp8,0,0.2870453397432963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,8,128,1,float16,fp8,0,0.2737226684888204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,8,128,1,float16,float16,0,0.2739306688308716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,48,8,128,1,fp8,fp8,0,0.2919893264770508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,48,128,1,float16,float16,0,0.16993600130081177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,1,128,1,float16,float16,0,0.1393119990825653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,48,128,1,float16,fp8,0,0.16538133223851523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,1,128,1,float16,fp8,0,0.13994666934013367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,1,128,1,fp8,fp8,0,0.13397333025932312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,48,128,1,fp8,fp8,0,0.16080000003178915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,2,128,1,float16,fp8,0,0.14147733648618063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,2,128,1,float16,float16,0,0.14071466525395712
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,2,128,1,fp8,fp8,0,0.13461333513259888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,4,128,1,float16,float16,0,0.14520000418027243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,4,128,1,float16,fp8,0,0.14628799756368002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,4,128,1,fp8,fp8,0,0.151637335618337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,8,128,1,float16,float16,0,0.1479039986928304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,8,128,1,float16,fp8,0,0.14601600170135498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,48,8,128,1,fp8,fp8,0,0.1528426706790924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,1,128,1,float16,float16,0,0.0798880010843277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,1,128,1,float16,fp8,0,0.07974400122960408
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,48,128,1,float16,fp8,0,0.09739733735720317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,48,128,1,fp8,fp8,0,0.08853866656621297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,1,128,1,fp8,fp8,0,0.07502933343251546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,48,128,1,float16,float16,0,0.10081066687901814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,2,128,1,float16,float16,0,0.08050666749477386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,2,128,1,float16,fp8,0,0.08045866588751475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,2,128,1,fp8,fp8,0,0.07690133154392242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,4,128,1,float16,float16,0,0.08257600168387096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,4,128,1,fp8,fp8,0,0.0822026679913203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,4,128,1,float16,fp8,0,0.08285333216190338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,8,128,1,float16,float16,0,0.08344533046086629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,8,128,1,fp8,fp8,0,0.0845973292986552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,48,8,128,1,float16,fp8,0,0.08311466872692108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,48,128,1,float16,float16,0,0.05563200016816457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,48,128,1,float16,fp8,0,0.05514666438102722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,1,128,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,48,128,1,fp8,fp8,0,0.05340266724427541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,1,128,1,float16,fp8,0,0.046725332736968994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,1,128,1,fp8,fp8,0,0.04353066782156626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,2,128,1,float16,float16,0,0.047040000557899475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,2,128,1,fp8,fp8,0,0.04394133388996124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,4,128,1,float16,float16,0,0.04836800197760264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,2,128,1,float16,fp8,0,0.04667200148105621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,4,128,1,float16,fp8,0,0.048570667703946434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,4,128,1,fp8,fp8,0,0.047295997540156044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,8,128,1,float16,float16,0,0.04839999973773956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,8,128,1,float16,fp8,0,0.048512001832326256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,48,8,128,1,fp8,fp8,0,0.04774933556715647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,48,128,1,float16,fp8,0,0.03532266616821289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,48,128,1,float16,float16,0,0.035930665830771126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,1,128,1,float16,float16,0,0.032698666055997215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,1,128,1,float16,fp8,0,0.03250666707754135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,2,128,1,float16,float16,0,0.03294933338960012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,1,128,1,fp8,fp8,0,0.030453334252039593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,2,128,1,float16,fp8,0,0.03271999955177307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,48,128,1,fp8,fp8,0,0.033301333586374916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,2,128,1,fp8,fp8,0,0.030879999200503033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,4,128,1,float16,float16,0,0.03327466547489166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,4,128,1,float16,fp8,0,0.03362133353948593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,4,128,1,fp8,fp8,0,0.03254399945338567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,8,128,1,float16,float16,0,0.03350399931271871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,8,128,1,float16,fp8,0,0.03378133227427801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,48,8,128,1,fp8,fp8,0,0.03254399945338567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,48,128,1,float16,fp8,0,0.02611733227968216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,48,128,1,float16,float16,0,0.026394667724768322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,48,128,1,fp8,fp8,0,0.024869332710901897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,1,128,1,float16,float16,0,0.02425066630045573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,1,128,1,float16,fp8,0,0.024714666108290356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,1,128,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,2,128,1,float16,float16,0,0.02446399877468745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,2,128,1,float16,fp8,0,0.024357333779335022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,4,128,1,float16,float16,0,0.025034666061401367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,4,128,1,fp8,fp8,0,0.023797333240509033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,4,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,8,128,1,float16,float16,0,0.025061334172884624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,8,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,8,128,1,fp8,fp8,0,0.023792001108328503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,48,2,128,1,fp8,fp8,0,0.02294933299223582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,1,128,1,float16,float16,0,0.8892693519592285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,1,128,1,float16,fp8,0,0.8882719675699869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,1,128,1,fp8,fp8,0,0.9465760389963785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,2,128,1,float16,fp8,0,0.9081546465555826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,2,128,1,float16,float16,0,0.9131680329640707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,2,128,1,fp8,fp8,0,0.9715572992960612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,4,128,1,float16,float16,0,0.9758453369140625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,4,128,1,float16,fp8,0,0.9607360363006592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,4,128,1,fp8,fp8,0,1.083743969599406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,1,128,1,float16,float16,0,0.45657066504160565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,8,128,1,float16,fp8,0,0.9794346491495768
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,8,128,1,fp8,fp8,0,1.100933313369751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,48,8,128,1,float16,float16,0,0.9825387001037598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,48,128,1,float16,fp8,0,0.5716426769892374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,48,128,1,float16,float16,0,0.5952106714248657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,1,128,1,float16,fp8,0,0.45446932315826416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,48,128,1,fp8,fp8,0,0.572816014289856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,1,128,1,fp8,fp8,0,0.4691679875055949
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,2,128,1,float16,float16,0,0.4681280056635539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,2,128,1,float16,fp8,0,0.46507732073465985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,2,128,1,fp8,fp8,0,0.49272533257802326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,4,128,1,float16,float16,0,0.49267733097076416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,4,128,1,float16,fp8,0,0.48500800132751465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,4,128,1,fp8,fp8,0,0.5525173346201578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,8,128,1,float16,float16,0,0.496613343556722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,8,128,1,float16,fp8,0,0.4910506804784139
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,1,128,1,float16,float16,0,0.23931199312210083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,48,8,128,1,fp8,fp8,0,0.5613866647084554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,1,128,1,float16,fp8,0,0.23799467086791992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,48,128,1,float16,float16,0,0.30843732754389447
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,1,128,1,fp8,fp8,0,0.24475200970967612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,48,128,1,float16,fp8,0,0.3091786702473958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,2,128,1,float16,float16,0,0.2439253330230713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,48,128,1,fp8,fp8,0,0.2993493278821309
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,2,128,1,float16,fp8,0,0.24415467182795206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,2,128,1,fp8,fp8,0,0.25170133511225384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,4,128,1,float16,float16,0,0.2542773286501567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,4,128,1,float16,fp8,0,0.2539573311805725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,4,128,1,fp8,fp8,0,0.2842133243878682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,8,128,1,float16,float16,0,0.25730667511622113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,8,128,1,float16,fp8,0,0.25286932786305744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,48,8,128,1,fp8,fp8,0,0.2898079951604207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,48,128,1,float16,float16,0,0.1734880010286967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,1,128,1,float16,float16,0,0.13129066427548727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,1,128,1,float16,fp8,0,0.13168000181516012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,1,128,1,fp8,fp8,0,0.13454932967821756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,48,128,1,float16,fp8,0,0.16325333714485168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,2,128,1,float16,float16,0,0.13452266653378805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,48,128,1,fp8,fp8,0,0.1612266699473063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,2,128,1,float16,fp8,0,0.13455999890963236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,2,128,1,fp8,fp8,0,0.1349066694577535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,4,128,1,float16,float16,0,0.13899733622868857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,4,128,1,float16,fp8,0,0.13909866412480673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,4,128,1,fp8,fp8,0,0.15127467115720114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,8,128,1,float16,float16,0,0.1405173341433207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,8,128,1,float16,fp8,0,0.1388800044854482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,48,8,128,1,fp8,fp8,0,0.1556106706460317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,48,128,1,float16,float16,0,0.09525332848230998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,1,128,1,float16,float16,0,0.07364800075689952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,48,128,1,float16,fp8,0,0.09127466877301534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,48,128,1,fp8,fp8,0,0.08921600381533305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,1,128,1,float16,fp8,0,0.07486400008201599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,1,128,1,fp8,fp8,0,0.0745119998852412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,2,128,1,float16,float16,0,0.07437333464622498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,2,128,1,float16,fp8,0,0.07450133562088013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,2,128,1,fp8,fp8,0,0.07502399881680806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,4,128,1,float16,float16,0,0.07715733349323273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,4,128,1,float16,fp8,0,0.07778133451938629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,4,128,1,fp8,fp8,0,0.08167999982833862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,8,128,1,float16,float16,0,0.07851733267307281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,8,128,1,float16,fp8,0,0.07752533257007599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,48,8,128,1,fp8,fp8,0,0.08360000451405843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,48,128,1,float16,float16,0,0.05372266471385956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,48,128,1,fp8,fp8,0,0.051594664653142296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,48,128,1,float16,fp8,0,0.052602668603261314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,1,128,1,float16,float16,0,0.04418666660785675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,1,128,1,float16,fp8,0,0.04381866753101349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,2,128,1,float16,float16,0,0.043807998299598694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,1,128,1,fp8,fp8,0,0.04297600189844767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,2,128,1,float16,fp8,0,0.04422399898370107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,2,128,1,fp8,fp8,0,0.04333333174387614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,4,128,1,float16,float16,0,0.04530133306980133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,4,128,1,float16,fp8,0,0.04582933088143667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,4,128,1,fp8,fp8,0,0.047295997540156044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,8,128,1,float16,float16,0,0.04538666705290476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,8,128,1,float16,fp8,0,0.04568000137805939
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,48,8,128,1,fp8,fp8,0,0.047877331574757896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,48,128,1,float16,float16,0,0.03333866596221924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,48,128,1,float16,fp8,0,0.03399466723203659
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,48,128,1,fp8,fp8,0,0.03409066547950109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,1,128,1,float16,float16,0,0.031157332162062328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,1,128,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,2,128,1,float16,float16,0,0.03134933362404505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,1,128,1,fp8,fp8,0,0.030389333764712017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,2,128,1,float16,fp8,0,0.03197866678237915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,2,128,1,fp8,fp8,0,0.030410667260487873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,4,128,1,float16,float16,0,0.03215466688076655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,4,128,1,float16,fp8,0,0.03253333270549774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,4,128,1,fp8,fp8,0,0.03258133431275686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,8,128,1,float16,float16,0,0.03235200047492981
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,8,128,1,float16,fp8,0,0.03218133250872294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,48,8,128,1,fp8,fp8,0,0.03253333270549774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,48,128,1,float16,float16,0,0.02475200096766154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,48,128,1,fp8,fp8,0,0.024656000236670177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,1,128,1,float16,fp8,0,0.023631999890009563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,1,128,1,float16,float16,0,0.02292266736427943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,48,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,1,128,1,fp8,fp8,0,0.02292799949645996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,2,128,1,float16,float16,0,0.02382933348417282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,2,128,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,2,128,1,float16,fp8,0,0.02383466561635335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,4,128,1,float16,float16,0,0.024031999210516613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,4,128,1,float16,fp8,0,0.024288001159826916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,4,128,1,fp8,fp8,0,0.023946667710940044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,8,128,1,float16,fp8,0,0.02359466751416524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,8,128,1,fp8,fp8,0,0.024090667565663654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,48,8,128,1,float16,float16,0,0.023605334262053173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,48,128,1,float16,float16,0,0.019695999721686046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,48,128,1,float16,fp8,0,0.02048533285657565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,48,128,1,fp8,fp8,0,0.020469332734743755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,1,128,1,float16,float16,0,0.020010666300853092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,1,128,1,float16,fp8,0,0.02004266654451688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,1,128,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,2,128,1,float16,float16,0,0.019658666104078293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,2,128,1,float16,fp8,0,0.02019199977318446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,2,128,1,fp8,fp8,0,0.019637333850065868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,4,128,1,float16,float16,0,0.02006400004029274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,4,128,1,float16,fp8,0,0.02033599962790807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,4,128,1,fp8,fp8,0,0.02004266654451688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,8,128,1,float16,float16,0,0.01974933346112569
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,8,128,1,float16,fp8,0,0.02021866664290428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,48,8,128,1,fp8,fp8,0,0.019679999599854153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,1,128,1,fp8,fp8,0,0.6223359902699789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,1,128,1,float16,fp8,0,0.5762666861216227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,1,128,1,float16,float16,0,0.5730079809824625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,2,128,1,float16,float16,0,0.5833599964777628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,2,128,1,float16,fp8,0,0.5802613496780396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,2,128,1,fp8,fp8,0,0.6529653469721476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,4,128,1,float16,float16,0,0.6096160014470419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,4,128,1,float16,fp8,0,0.6093013286590576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,1,128,1,float16,float16,0,0.2949066758155823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,4,128,1,fp8,fp8,0,0.7183146476745605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,8,128,1,float16,float16,0,0.6155253251393636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,8,128,1,fp8,fp8,0,0.726207971572876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,48,8,128,1,float16,fp8,0,0.6168746550877889
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,48,128,1,fp8,fp8,0,0.37542935212453205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,48,128,1,float16,fp8,0,0.32683734099070233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,48,128,1,float16,float16,0,0.3364693323771159
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,1,128,1,float16,fp8,0,0.2951573332150777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,1,128,1,fp8,fp8,0,0.32521067063013714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,2,128,1,float16,float16,0,0.2998186747233073
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,2,128,1,float16,fp8,0,0.2982826630274455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,2,128,1,fp8,fp8,0,0.33104000488917035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,4,128,1,float16,float16,0,0.31227733691533405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,4,128,1,fp8,fp8,0,0.36722131570180255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,4,128,1,float16,fp8,0,0.3102026581764221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,8,128,1,float16,float16,0,0.3142720063527425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,8,128,1,float16,fp8,0,0.3109920024871826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,48,8,128,1,fp8,fp8,0,0.3727733294169108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,1,128,1,float16,float16,0,0.15832533439000449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,1,128,1,float16,fp8,0,0.1585813363393148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,1,128,1,fp8,fp8,0,0.1713493267695109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,48,128,1,float16,float16,0,0.1779786745707194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,48,128,1,float16,fp8,0,0.17385600010553995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,2,128,1,float16,float16,0,0.15963733196258545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,48,128,1,fp8,fp8,0,0.1953279972076416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,4,128,1,float16,float16,0,0.16622400283813477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,2,128,1,float16,fp8,0,0.15924800435702005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,4,128,1,float16,fp8,0,0.16577066977818808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,2,128,1,fp8,fp8,0,0.17617599169413248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,4,128,1,fp8,fp8,0,0.1909439961115519
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,8,128,1,float16,float16,0,0.16638400157292685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,8,128,1,float16,fp8,0,0.16646400094032288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,48,8,128,1,fp8,fp8,0,0.19330666462580362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,1,128,1,float16,fp8,0,0.08718933661778767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,1,128,1,float16,float16,0,0.08719999591509502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,1,128,1,fp8,fp8,0,0.09501866499582927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,48,128,1,float16,float16,0,0.0974079966545105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,48,128,1,float16,fp8,0,0.09487467010815938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,2,128,1,float16,float16,0,0.08775466680526733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,48,128,1,fp8,fp8,0,0.10814932982126872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,2,128,1,float16,fp8,0,0.08732799688975017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,2,128,1,fp8,fp8,0,0.09633599718411763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,4,128,1,float16,float16,0,0.09056533376375835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,4,128,1,float16,fp8,0,0.09001599748929341
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,4,128,1,fp8,fp8,0,0.1009386678536733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,8,128,1,float16,float16,0,0.09123200178146362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,8,128,1,fp8,fp8,0,0.10355200370152791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,48,8,128,1,float16,fp8,0,0.09105066458384196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,48,128,1,float16,float16,0,0.05629333357016245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,1,128,1,float16,float16,0,0.0499893327554067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,48,128,1,float16,fp8,0,0.054799998799959816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,1,128,1,fp8,fp8,0,0.05376000205675761
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,1,128,1,float16,fp8,0,0.05040533343950907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,48,128,1,fp8,fp8,0,0.06281066437562306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,2,128,1,float16,float16,0,0.05045866469542185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,2,128,1,float16,fp8,0,0.05114666620890299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,2,128,1,fp8,fp8,0,0.05442666510740916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,4,128,1,float16,float16,0,0.05227200190226237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,4,128,1,float16,fp8,0,0.05246399839719137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,4,128,1,fp8,fp8,0,0.05780800183614095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,8,128,1,float16,float16,0,0.052245333790779114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,8,128,1,float16,fp8,0,0.05268266797065735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,48,8,128,1,fp8,fp8,0,0.05780800183614095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,48,128,1,float16,float16,0,0.03338133295377096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,48,128,1,float16,fp8,0,0.032773333291212715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,1,128,1,float16,float16,0,0.032245332996050514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,48,128,1,fp8,fp8,0,0.03661333272854487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,1,128,1,float16,fp8,0,0.032357332607110344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,1,128,1,fp8,fp8,0,0.033946665624777474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,2,128,1,float16,fp8,0,0.03234133372704188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,2,128,1,float16,float16,0,0.032831999162832894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,2,128,1,fp8,fp8,0,0.03443199892838796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,4,128,1,float16,float16,0,0.033439998825391136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,4,128,1,float16,fp8,0,0.033189333975315094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,4,128,1,fp8,fp8,0,0.03548266738653183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,8,128,1,float16,float16,0,0.03312533348798752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,8,128,1,float16,fp8,0,0.03347733368476232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,48,8,128,1,fp8,fp8,0,0.0359199990828832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,48,128,1,float16,float16,0,0.026869334280490875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,48,128,1,float16,fp8,0,0.026821332673231762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,48,128,1,fp8,fp8,0,0.028773332635561626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,1,128,1,float16,float16,0,0.025626666843891144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,1,128,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,1,128,1,fp8,fp8,0,0.026608000199000042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,2,128,1,float16,float16,0,0.025573333104451496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,2,128,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,2,128,1,fp8,fp8,0,0.027104000250498455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,4,128,1,float16,float16,0,0.02603733291228612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,4,128,1,float16,fp8,0,0.026373334228992462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,4,128,1,fp8,fp8,0,0.028175999720891316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,8,128,1,float16,float16,0,0.02589866767326991
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,8,128,1,float16,fp8,0,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,48,8,128,1,fp8,fp8,0,0.02775466690460841
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,48,128,1,float16,float16,0,0.01907733331123988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,48,128,1,float16,fp8,0,0.019754666835069656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,48,128,1,fp8,fp8,0,0.02085866779088974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,1,128,1,float16,float16,0,0.018650667121013004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,1,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,1,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,2,128,1,float16,float16,0,0.018735999862353008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,2,128,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,2,128,1,fp8,fp8,0,0.019930666933457058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,4,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,4,128,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,4,128,1,fp8,fp8,0,0.019946667055288952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,8,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,8,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,48,128,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,48,8,128,1,fp8,fp8,0,0.020303999384244282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,48,128,1,float16,fp8,0,0.01844266677896182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,1,128,1,float16,float16,0,0.018207999567190807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,48,128,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,1,128,1,float16,fp8,0,0.018426666657129925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,2,128,1,float16,float16,0,0.017797333498795826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,1,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,2,128,1,float16,fp8,0,0.018496000518401463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,2,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,4,128,1,float16,float16,0,0.01794133335351944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,4,128,1,float16,fp8,0,0.018618666877349217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,8,128,1,float16,float16,0,0.018170667191346485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,8,128,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,4,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,48,8,128,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,1,128,1,float16,float16,0,0.39134399096171063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,1,128,1,fp8,fp8,0,0.4976266622543335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,2,128,1,float16,float16,0,0.39742398262023926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,1,128,1,float16,fp8,0,0.3913653294245402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,2,128,1,float16,fp8,0,0.3956799904505412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,4,128,1,float16,float16,0,0.4117973248163859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,4,128,1,float16,fp8,0,0.4127519925435384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,2,128,1,fp8,fp8,0,0.5077279806137085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,4,128,1,fp8,fp8,0,0.5432159900665283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,1,128,1,float16,float16,0,0.2031360069910685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,8,128,1,float16,float16,0,0.41628801822662354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,8,128,1,float16,fp8,0,0.41737067699432373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,48,8,128,1,fp8,fp8,0,0.5532053311665853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,48,128,1,float16,fp8,0,0.21923200289408365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,48,128,1,float16,float16,0,0.2246346672375997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,48,128,1,fp8,fp8,0,0.2851360042889913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,1,128,1,float16,fp8,0,0.2037386695543925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,2,128,1,float16,float16,0,0.2062399983406067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,2,128,1,float16,fp8,0,0.2069279948870341
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,4,128,1,float16,float16,0,0.21454399824142456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,1,128,1,fp8,fp8,0,0.2609813412030538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,4,128,1,float16,fp8,0,0.21449067195256552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,4,128,1,fp8,fp8,0,0.27725332975387573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,2,128,1,fp8,fp8,0,0.2634773254394531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,8,128,1,float16,float16,0,0.21733866135279337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,8,128,1,float16,fp8,0,0.21845867236455283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,48,8,128,1,fp8,fp8,0,0.2814720074335734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,1,128,1,float16,float16,0,0.10795733332633972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,48,128,1,float16,float16,0,0.12105600039164226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,48,128,1,float16,fp8,0,0.11920533577601115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,1,128,1,fp8,fp8,0,0.14144532879193625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,48,128,1,fp8,fp8,0,0.15362133582433066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,1,128,1,float16,fp8,0,0.10876267155011494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,2,128,1,float16,float16,0,0.10841066638628642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,2,128,1,float16,fp8,0,0.10870400071144104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,2,128,1,fp8,fp8,0,0.14221333463986716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,4,128,1,float16,float16,0,0.11100266377131145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,4,128,1,fp8,fp8,0,0.14799466729164124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,8,128,1,float16,float16,0,0.11171733339627583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,4,128,1,float16,fp8,0,0.11101866761843364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,8,128,1,fp8,fp8,0,0.15065600474675497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,48,8,128,1,float16,fp8,0,0.1126026709874471
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,48,128,1,float16,float16,0,0.0677706648906072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,48,128,1,float16,fp8,0,0.06716266771157582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,1,128,1,float16,float16,0,0.06043733159701029
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,1,128,1,float16,fp8,0,0.06080533564090729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,48,128,1,fp8,fp8,0,0.08602666854858398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,2,128,1,float16,float16,0,0.06081599990526835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,1,128,1,fp8,fp8,0,0.07689066727956136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,2,128,1,float16,fp8,0,0.061008001367251076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,2,128,1,fp8,fp8,0,0.07900799810886383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,4,128,1,float16,float16,0,0.06241600215435028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,4,128,1,float16,fp8,0,0.06205866734186808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,4,128,1,fp8,fp8,0,0.08175466458002727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,8,128,1,float16,float16,0,0.06289066871007283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,8,128,1,float16,fp8,0,0.06345066428184509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,48,128,1,float16,float16,0,0.038389332592487335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,48,128,1,float16,fp8,0,0.03865066667397817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,48,128,1,fp8,fp8,0,0.04865066707134247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,1,128,1,float16,float16,0,0.03762666632731756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,48,8,128,1,fp8,fp8,0,0.08197866876920064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,1,128,1,fp8,fp8,0,0.046853333711624146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,1,128,1,float16,fp8,0,0.037871999045213066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,2,128,1,float16,float16,0,0.03756266583998998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,2,128,1,float16,fp8,0,0.03770133356253306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,2,128,1,fp8,fp8,0,0.04624533156553904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,4,128,1,float16,float16,0,0.03852800031503042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,4,128,1,float16,fp8,0,0.03889599939187368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,8,128,1,float16,float16,0,0.03881600002447764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,8,128,1,fp8,fp8,0,0.04800533254941305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,8,128,1,float16,fp8,0,0.03882666677236557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,48,4,128,1,fp8,fp8,0,0.04789866507053375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,48,128,1,fp8,fp8,0,0.03327466547489166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,1,128,1,float16,float16,0,0.026416001220544178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,48,128,1,float16,fp8,0,0.027957332630952198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,1,128,1,float16,fp8,0,0.026943999032179516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,1,128,1,fp8,fp8,0,0.030762667457262676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,48,128,1,float16,float16,0,0.02752000093460083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,2,128,1,float16,float16,0,0.026560001075267792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,2,128,1,fp8,fp8,0,0.031130666534105938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,4,128,1,float16,float16,0,0.02683199942111969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,8,128,1,float16,float16,0,0.026975999275843304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,4,128,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,4,128,1,fp8,fp8,0,0.03221333275238673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,2,128,1,float16,fp8,0,0.026341333985328674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,8,128,1,fp8,fp8,0,0.03188266605138779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,48,8,128,1,float16,fp8,0,0.02749866743882497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,48,128,1,float16,float16,0,0.02107200026512146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,48,128,1,fp8,fp8,0,0.024400000770886738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,48,128,1,float16,fp8,0,0.02146666745344798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,1,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,1,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,2,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,2,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,4,128,1,float16,float16,0,0.020879998803138733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,2,128,1,fp8,fp8,0,0.02370133250951767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,4,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,1,128,1,fp8,fp8,0,0.023669332265853882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,4,128,1,fp8,fp8,0,0.023904000719388325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,8,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,8,128,1,float16,fp8,0,0.021712000171343487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,48,128,1,float16,float16,0,0.017456000049908955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,48,8,128,1,fp8,fp8,0,0.02404800057411194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,48,128,1,float16,fp8,0,0.01815466706951459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,48,128,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,1,128,1,float16,fp8,0,0.017685333887736004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,1,128,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,1,128,1,float16,float16,0,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,2,128,1,float16,fp8,0,0.017818666994571686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,2,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,2,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,4,128,1,float16,float16,0,0.01748266691962878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,4,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,8,128,1,float16,float16,0,0.017573333034912746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,4,128,1,float16,fp8,0,0.01786133274435997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,8,128,1,fp8,fp8,0,0.019925333559513092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,48,128,1,float16,float16,0,0.01661866654952367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,1,128,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,48,128,1,fp8,fp8,0,0.018474667022625606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,1,128,1,float16,float16,0,0.01729600007335345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,48,8,128,1,float16,fp8,0,0.01806933308641116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,1,128,1,fp8,fp8,0,0.01834133391578992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,48,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,2,128,1,float16,float16,0,0.01685333376129468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,2,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,2,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,4,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,4,128,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,8,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,8,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,8,128,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,48,4,128,1,fp8,fp8,0,0.018383999665578205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,1,128,1,float16,float16,0,0.33241067330042523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,1,128,1,float16,fp8,0,0.33290133873621625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,1,128,1,fp8,fp8,0,0.43929068247477215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,2,128,1,float16,float16,0,0.33640531698862713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,2,128,1,float16,fp8,0,0.3356906572977702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,4,128,1,float16,float16,0,0.341813325881958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,2,128,1,fp8,fp8,0,0.44040000438690186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,4,128,1,float16,fp8,0,0.34295467535654706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,4,128,1,fp8,fp8,0,0.4563680092493693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,8,128,1,float16,float16,0,0.34671465555826825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,8,128,1,float16,fp8,0,0.3463253180185954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,48,8,128,1,fp8,fp8,0,0.46058666706085205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,48,128,1,float16,float16,0,0.17481066783269247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,48,128,1,float16,fp8,0,0.17126933733622232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,1,128,1,fp8,fp8,0,0.23104000091552734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,1,128,1,float16,fp8,0,0.17207467555999756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,2,128,1,float16,float16,0,0.17193067073822021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,1,128,1,float16,float16,0,0.17243733008702597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,2,128,1,float16,fp8,0,0.17267199357350668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,2,128,1,fp8,fp8,0,0.23172799746195474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,4,128,1,float16,float16,0,0.17508800824483237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,48,128,1,fp8,fp8,0,0.24235733350118002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,4,128,1,float16,fp8,0,0.17588265736897787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,4,128,1,fp8,fp8,0,0.23700799544652304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,8,128,1,float16,float16,0,0.17694934209187826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,8,128,1,fp8,fp8,0,0.24091200033823648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,48,128,1,float16,float16,0,0.09460799892743428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,48,128,1,float16,fp8,0,0.09338666995366414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,48,8,128,1,float16,fp8,0,0.1771893302599589
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,1,128,1,float16,float16,0,0.09347200393676758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,48,128,1,fp8,fp8,0,0.13222400347391763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,1,128,1,float16,fp8,0,0.09356799721717834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,1,128,1,fp8,fp8,0,0.12432000041007996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,2,128,1,float16,float16,0,0.0934986670811971
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,2,128,1,float16,fp8,0,0.09365333120028178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,2,128,1,fp8,fp8,0,0.12545067071914673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,4,128,1,float16,fp8,0,0.09570667147636414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,4,128,1,float16,float16,0,0.09517866373062134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,8,128,1,float16,float16,0,0.09519466757774353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,4,128,1,fp8,fp8,0,0.12841066718101501
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,8,128,1,fp8,fp8,0,0.12946666280428568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,48,8,128,1,float16,fp8,0,0.09562666217486064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,48,128,1,float16,fp8,0,0.05202133456865946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,48,128,1,float16,float16,0,0.05236800014972687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,48,128,1,fp8,fp8,0,0.07225066423416138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,1,128,1,float16,float16,0,0.05336533486843109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,1,128,1,float16,fp8,0,0.05422399938106537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,1,128,1,fp8,fp8,0,0.07004799942175548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,2,128,1,float16,float16,0,0.054005334774653115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,2,128,1,float16,fp8,0,0.05422399938106537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,2,128,1,fp8,fp8,0,0.07056533296902974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,4,128,1,float16,float16,0,0.05429333448410034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,4,128,1,float16,fp8,0,0.05580799778302511
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,8,128,1,float16,float16,0,0.05482666691144308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,4,128,1,fp8,fp8,0,0.07187733550866444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,8,128,1,fp8,fp8,0,0.07252266506354015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,48,8,128,1,float16,fp8,0,0.05484800040721893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,48,128,1,float16,float16,0,0.034688000877698265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,48,128,1,float16,fp8,0,0.03514666606982549
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,1,128,1,float16,float16,0,0.03608533243338267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,48,128,1,fp8,fp8,0,0.044581333796183266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,1,128,1,float16,fp8,0,0.03572800010442734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,2,128,1,float16,float16,0,0.03640533238649368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,1,128,1,fp8,fp8,0,0.04347200194994608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,4,128,1,float16,float16,0,0.03633599976698557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,2,128,1,fp8,fp8,0,0.04348266621430715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,2,128,1,float16,fp8,0,0.03589866558710734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,4,128,1,float16,fp8,0,0.03705599904060364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,4,128,1,fp8,fp8,0,0.04426133135954539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,8,128,1,float16,float16,0,0.036346666514873505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,8,128,1,float16,fp8,0,0.037087999284267426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,48,128,1,float16,float16,0,0.02372266600529353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,48,8,128,1,fp8,fp8,0,0.04404800136884054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,48,128,1,float16,fp8,0,0.02502399931351344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,1,128,1,float16,float16,0,0.02409599969784419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,1,128,1,float16,fp8,0,0.02422400067249934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,48,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,1,128,1,fp8,fp8,0,0.02864533414443334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,2,128,1,float16,float16,0,0.02409599969784419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,2,128,1,float16,fp8,0,0.023962666591008503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,4,128,1,float16,fp8,0,0.024256000916163128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,2,128,1,fp8,fp8,0,0.027984000742435455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,4,128,1,float16,float16,0,0.023984000086784363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,4,128,1,fp8,fp8,0,0.028304000695546467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,8,128,1,float16,fp8,0,0.024549332757790882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,8,128,1,float16,float16,0,0.02405333270629247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,48,8,128,1,fp8,fp8,0,0.028421332438786823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,48,128,1,float16,float16,0,0.019946667055288952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,48,128,1,float16,fp8,0,0.02059200033545494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,1,128,1,float16,float16,0,0.01989866668979327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,1,128,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,48,128,1,fp8,fp8,0,0.023567999402681988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,1,128,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,2,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,2,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,2,128,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,4,128,1,float16,float16,0,0.020736000190178554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,4,128,1,float16,fp8,0,0.020762667059898376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,4,128,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,8,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,8,128,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,48,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,48,8,128,1,float16,float16,0,0.020576000213623047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,48,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,48,128,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,1,128,1,float16,float16,0,0.016693333784739178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,1,128,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,2,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,2,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,4,128,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,4,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,2,128,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,4,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,8,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,8,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,48,8,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,48,128,1,float16,float16,0,0.01659199967980385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,48,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,1,128,1,float16,float16,0,0.016501333564519882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,1,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,48,128,1,fp8,fp8,0,0.01830400029818217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,1,128,1,fp8,fp8,0,0.018298666924238205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,2,128,1,float16,float16,0,0.016399999459584553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,2,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,2,128,1,fp8,fp8,0,0.01829333355029424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,4,128,1,float16,float16,0,0.016778666526079178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,4,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,4,128,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,8,128,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,8,128,1,float16,float16,0,0.01629866659641266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,48,8,128,1,fp8,fp8,0,0.0182239996890227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,128,1,float16,float16,0,41.89070383707682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,128,1,float16,fp8,0,41.50421396891276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,128,1,fp8,fp8,0,27.38172149658203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,128,1,fp8,fp8,0,27.516123453776043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,128,1,float16,fp8,0,41.287984212239586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,128,1,float16,fp8,0,42.11535390218099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,128,1,float16,float16,0,42.610249837239586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,128,1,float16,float16,0,41.65252176920573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,128,1,float16,float16,0,20.966463724772137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,128,1,fp8,fp8,0,27.693707784016926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,128,1,fp8,fp8,0,27.681124369303387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,128,1,float16,fp8,0,41.692708333333336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,128,1,float16,float16,0,20.6909917195638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,128,1,float16,fp8,0,20.65894953409831
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,128,1,float16,fp8,0,21.23566436767578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,128,1,float16,float16,0,41.668294270833336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,128,1,fp8,fp8,0,14.385108947753906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,128,1,fp8,fp8,0,13.96499252319336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,128,1,float16,float16,0,20.936687469482422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,128,1,fp8,fp8,0,13.837450663248697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,128,1,float16,fp8,0,21.001253763834637
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,128,1,float16,fp8,0,21.246623992919922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,128,1,float16,float16,0,21.099488576253254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,128,1,fp8,fp8,0,13.82601547241211
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,128,1,float16,float16,0,21.376063028971355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,128,1,float16,fp8,0,21.040667215983074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,128,1,float16,fp8,0,10.59070905049642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,128,1,float16,float16,0,10.663301467895508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,128,1,fp8,fp8,0,7.0189971923828125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,128,1,fp8,fp8,0,13.999183654785156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,128,1,float16,float16,0,10.612703959147135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,128,1,float16,float16,0,10.529749552408854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,128,1,fp8,fp8,0,7.123157501220703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,128,1,float16,fp8,0,10.535242716471354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,128,1,float16,fp8,0,10.511103947957357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,128,1,fp8,fp8,0,6.976261138916016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,128,1,float16,float16,0,10.805189768473307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,128,1,float16,fp8,0,10.623514811197916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,128,1,fp8,fp8,0,7.019941329956055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,128,1,float16,float16,0,5.154959996541341
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,128,1,float16,float16,0,10.52996826171875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,128,1,float16,fp8,0,5.371653238932292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,128,1,float16,fp8,0,10.708421071370443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,128,1,fp8,fp8,0,7.0333601633707685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,128,1,float16,float16,0,5.1949812571207685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,128,1,float16,fp8,0,5.254656155904134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,128,1,fp8,fp8,0,3.5625387827555337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,128,1,float16,float16,0,5.230655988057454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,128,1,float16,fp8,0,5.145615895589192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,128,1,fp8,fp8,0,3.57802677154541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,128,1,float16,float16,0,5.351173400878906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,128,1,float16,fp8,0,5.373274485270183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,128,1,fp8,fp8,0,3.6970720291137695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,128,1,fp8,fp8,0,3.593088150024414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,128,1,float16,float16,0,5.2967573801676435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,128,1,float16,fp8,0,5.262485186258952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,128,1,fp8,fp8,0,3.580810546875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,128,1,float16,float16,0,24.417861938476562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,128,1,fp8,fp8,0,16.12272008260091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,128,1,float16,fp8,0,24.042203267415363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,128,1,float16,float16,0,24.75804901123047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,128,1,fp8,fp8,0,16.261637369791668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,128,1,float16,fp8,0,24.054911295572918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,128,1,float16,float16,0,24.03076680501302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,128,1,float16,fp8,0,24.100687662760418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,128,1,float16,float16,0,12.230682373046875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,128,1,fp8,fp8,0,16.386207580566406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,128,1,float16,fp8,0,24.185867309570312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,128,1,fp8,fp8,0,16.596341451009113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,128,1,float16,float16,0,24.194529215494793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,128,1,float16,float16,0,12.185306549072266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,128,1,float16,fp8,0,12.144490559895834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,128,1,fp8,fp8,0,8.074581146240234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,128,1,fp8,fp8,0,8.27459716796875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,128,1,float16,fp8,0,12.260011037190756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,128,1,float16,float16,0,12.1734619140625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,128,1,float16,fp8,0,12.416144053141275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,128,1,float16,float16,0,12.092496236165365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,128,1,float16,fp8,0,12.184890747070312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,128,1,fp8,fp8,0,8.362933476765951
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,128,1,fp8,fp8,0,8.10096549987793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,128,1,float16,float16,0,12.156842549641928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,128,1,float16,fp8,0,12.238277435302734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,128,1,float16,float16,0,6.067973454793294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,128,1,float16,fp8,0,5.985237121582031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,128,1,fp8,fp8,0,8.112607955932617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,128,1,float16,float16,0,6.116687774658203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,128,1,fp8,fp8,0,4.022272109985352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,128,1,float16,float16,0,6.020469029744466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,128,1,fp8,fp8,0,4.10919984181722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,128,1,float16,fp8,0,6.0783945719401045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,128,1,float16,fp8,0,5.903247833251953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,128,1,fp8,fp8,0,4.079237302144368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,128,1,float16,float16,0,5.966826756795247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,128,1,float16,fp8,0,5.880346934000651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,128,1,fp8,fp8,0,4.033583958943685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,128,1,float16,float16,0,6.028160095214844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,128,1,fp8,fp8,0,4.177829424540202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,128,1,float16,fp8,0,6.0084584554036455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,128,1,float16,float16,0,3.0264692306518555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,128,1,float16,float16,0,3.1397759119669595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,128,1,float16,fp8,0,3.014298756917318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,128,1,float16,fp8,0,3.023242632548014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,128,1,fp8,fp8,0,2.1905813217163086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,128,1,fp8,fp8,0,2.170032024383545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,128,1,float16,float16,0,3.123215993245443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,128,1,float16,fp8,0,3.0314133961995444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,128,1,fp8,fp8,0,2.1613866488138833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,128,1,float16,fp8,0,3.0255254109700522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,128,1,float16,float16,0,3.1172053019205728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,128,1,fp8,fp8,0,2.163632074991862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,128,1,float16,float16,0,3.0762240091959634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,128,1,float16,fp8,0,3.0758558909098306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,128,1,fp8,fp8,0,2.1760053634643555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,128,1,float16,float16,0,17.190282185872395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,128,1,fp8,fp8,0,11.60757827758789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,128,1,float16,fp8,0,16.912255605061848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,128,1,float16,float16,0,17.41551971435547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,128,1,float16,fp8,0,17.013626098632812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,128,1,fp8,fp8,0,11.551920572916666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,128,1,float16,float16,0,17.16587193806966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,128,1,float16,fp8,0,17.25750986735026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,128,1,float16,float16,0,8.739744186401367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,128,1,fp8,fp8,0,11.53323237101237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,128,1,float16,fp8,0,17.082005818684895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,128,1,float16,float16,0,17.24548848470052
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,128,1,fp8,fp8,0,11.917471567789713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,128,1,float16,fp8,0,8.697184244791666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,128,1,float16,fp8,0,8.60371208190918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,128,1,float16,float16,0,8.619338353474935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,128,1,fp8,fp8,0,5.734506607055664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,128,1,float16,float16,0,8.502607981363932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,128,1,fp8,fp8,0,5.990570704142253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,128,1,float16,fp8,0,8.523242950439453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,128,1,fp8,fp8,0,5.885653177897136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,128,1,float16,float16,0,8.536762873331705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,128,1,fp8,fp8,0,5.708874384562175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,128,1,float16,fp8,0,8.637237548828125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,128,1,float16,float16,0,8.609957377115885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,128,1,float16,fp8,0,4.293274561564128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,128,1,float16,float16,0,4.143909454345703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,128,1,fp8,fp8,0,2.937509218851725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,128,1,float16,fp8,0,8.575589497884115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,128,1,fp8,fp8,0,5.9736372629801435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,128,1,float16,float16,0,4.148031870524089
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,128,1,float16,float16,0,4.201637268066406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,128,1,float16,fp8,0,4.15114148457845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,128,1,float16,fp8,0,4.339776039123535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,128,1,fp8,fp8,0,3.0017547607421875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,128,1,fp8,fp8,0,2.938938776652018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,128,1,float16,float16,0,4.30507214864095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,128,1,float16,fp8,0,4.20363203684489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,128,1,fp8,fp8,0,2.942805290222168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,128,1,float16,float16,0,4.28765328725179
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,128,1,float16,fp8,0,4.237733205159505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,128,1,float16,float16,0,2.217189311981201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,128,1,fp8,fp8,0,2.9557813008626304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,128,1,float16,float16,0,2.2321759859720864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,128,1,float16,fp8,0,2.192901293436686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,128,1,float16,fp8,0,2.1842239697774253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,128,1,fp8,fp8,0,1.594528039296468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,128,1,float16,float16,0,2.2106080055236816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,128,1,float16,fp8,0,2.1974827448527017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,128,1,fp8,fp8,0,1.6170132954915364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,128,1,fp8,fp8,0,1.5962026913960774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,128,1,float16,float16,0,2.23743470509847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,128,1,float16,fp8,0,2.1923893292744956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,128,1,float16,float16,0,2.240117390950521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,128,1,fp8,fp8,0,1.5978506406148274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,128,1,float16,fp8,0,2.1907679239908853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,128,1,fp8,fp8,0,1.6055893898010254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,128,1,float16,float16,0,22.481829325358074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,128,1,fp8,fp8,0,15.775445302327475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,128,1,float16,fp8,0,22.304283142089844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,128,1,float16,float16,0,22.610089619954426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,128,1,float16,fp8,0,22.72491709391276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,128,1,float16,float16,0,22.50507100423177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,128,1,float16,fp8,0,22.43012237548828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,128,1,fp8,fp8,0,15.377488454182943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,128,1,float16,float16,0,11.26363754272461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,128,1,fp8,fp8,0,15.523296356201172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,128,1,float16,float16,0,23.074689229329426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,128,1,fp8,fp8,0,15.52938715616862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,128,1,float16,float16,0,11.550272623697916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,128,1,float16,fp8,0,11.208133697509766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,128,1,float16,fp8,0,22.54095458984375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,128,1,fp8,fp8,0,8.013439814249674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,128,1,float16,fp8,0,11.345019022623697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,128,1,fp8,fp8,0,7.699482599894206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,128,1,float16,float16,0,11.499056498209635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,128,1,float16,fp8,0,11.28280512491862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,128,1,fp8,fp8,0,7.800352096557617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,128,1,float16,float16,0,11.303488413492838
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,128,1,fp8,fp8,0,7.689690907796224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,128,1,float16,fp8,0,11.197371164957682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,128,1,float16,float16,0,11.315439860026041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,128,1,float16,fp8,0,11.209669748942057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,128,1,float16,float16,0,5.547258377075195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,128,1,fp8,fp8,0,8.085973103841146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,128,1,float16,fp8,0,5.578362782796224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,128,1,fp8,fp8,0,3.8456106185913086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,128,1,float16,float16,0,5.5408477783203125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,128,1,float16,float16,0,5.504543940226237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,128,1,float16,fp8,0,5.623765309651692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,128,1,float16,fp8,0,5.5562082926432295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,128,1,fp8,fp8,0,3.8608373006184897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,128,1,float16,fp8,0,5.509877522786458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,128,1,float16,float16,0,5.6601918538411455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,128,1,fp8,fp8,0,3.965237299601237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,128,1,fp8,fp8,0,3.8347094853719077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,128,1,float16,float16,0,5.613168080647786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,128,1,float16,fp8,0,5.7005869547526045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,128,1,float16,float16,0,2.778719902038574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,128,1,float16,fp8,0,2.7453225453694663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,128,1,fp8,fp8,0,3.890501340230306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,128,1,fp8,fp8,0,2.011552015940348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,128,1,float16,float16,0,2.832474708557129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,128,1,float16,float16,0,2.812485376993815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,128,1,fp8,fp8,0,2.0095839500427246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,128,1,float16,fp8,0,2.762437184651693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,128,1,fp8,fp8,0,2.0712693532307944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,128,1,float16,fp8,0,2.7758239110310874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,128,1,float16,float16,0,2.8014294306437173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,128,1,float16,fp8,0,2.761653264363607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,128,1,fp8,fp8,0,2.017045338948568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,128,1,float16,float16,0,2.824101448059082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,128,1,float16,float16,0,1.5035680135091145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,128,1,float16,fp8,0,2.7591892878214517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,128,1,fp8,fp8,0,2.0273760159810386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,128,1,float16,fp8,0,1.495514710744222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,128,1,float16,float16,0,1.497968037923177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,128,1,fp8,fp8,0,1.1354666550954182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,128,1,fp8,fp8,0,1.1123253504435222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,128,1,float16,float16,0,1.5056853294372559
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,128,1,float16,fp8,0,1.4877759615580242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,128,1,float16,fp8,0,1.4827893575032551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,128,1,fp8,fp8,0,1.1054666837056477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,128,1,float16,float16,0,1.5192532539367676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,128,1,fp8,fp8,0,1.1131892999013264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,128,1,float16,fp8,0,1.486944039662679
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,128,1,fp8,fp8,0,1.1204640070597331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,128,1,float16,float16,0,1.5228373209635417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,128,1,float16,fp8,0,1.4910987218221028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,128,1,float16,fp8,0,13.136080423990885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,128,1,float16,float16,0,13.339237213134766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,128,1,fp8,fp8,0,9.476608276367188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,128,1,float16,float16,0,13.327370961507162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,128,1,fp8,fp8,0,9.297824223836264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,128,1,float16,fp8,0,13.192698160807291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,128,1,float16,float16,0,13.50753657023112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,128,1,float16,fp8,0,13.300890604654947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,128,1,float16,float16,0,6.618053436279297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,128,1,fp8,fp8,0,9.378719965616861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,128,1,float16,fp8,0,13.574970245361328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,128,1,float16,float16,0,13.3744265238444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,128,1,fp8,fp8,0,9.231573104858398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,128,1,float16,fp8,0,6.553504308064778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,128,1,fp8,fp8,0,4.864357312520345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,128,1,fp8,fp8,0,4.581173261006673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,128,1,float16,fp8,0,6.448010762532552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,128,1,float16,float16,0,6.5127410888671875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,128,1,float16,float16,0,6.5994828542073565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,128,1,float16,fp8,0,6.582511901855469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,128,1,fp8,fp8,0,4.605807940165202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,128,1,float16,float16,0,6.544549306233724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,128,1,fp8,fp8,0,4.599562644958496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,128,1,float16,fp8,0,6.487279891967773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,128,1,float16,float16,0,6.661685307820638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,128,1,float16,fp8,0,6.459349314371745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,128,1,fp8,fp8,0,4.637141227722168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,128,1,float16,float16,0,3.2036587397257485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,128,1,float16,fp8,0,3.2058614095052085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,128,1,float16,float16,0,3.2697439193725586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,128,1,float16,fp8,0,3.265045483907064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,128,1,fp8,fp8,0,2.363807996114095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,128,1,fp8,fp8,0,2.476682662963867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,128,1,float16,float16,0,3.2046985626220703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,128,1,float16,fp8,0,3.2189865112304688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,128,1,fp8,fp8,0,2.361253261566162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,128,1,float16,float16,0,3.287856101989746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,128,1,float16,fp8,0,3.171717325846354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,128,1,fp8,fp8,0,2.370240052541097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,128,1,float16,fp8,0,3.195631980895996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,128,1,float16,float16,0,3.220394770304362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,128,1,float16,float16,0,1.6848853429158528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,128,1,float16,fp8,0,1.6669492721557617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,128,1,fp8,fp8,0,2.3854239781697593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,128,1,fp8,fp8,0,1.255306641260783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,128,1,float16,float16,0,1.7222240765889485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,128,1,float16,float16,0,1.6890932718912761
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,128,1,float16,fp8,0,1.6782666842142742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,128,1,fp8,fp8,0,1.310154676437378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,128,1,fp8,fp8,0,1.2572746276855469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,128,1,float16,fp8,0,1.6629014015197754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,128,1,float16,float16,0,1.691365400950114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,128,1,float16,fp8,0,1.6693013509114583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,128,1,fp8,fp8,0,1.264031966527303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,128,1,float16,float16,0,1.6957227389017742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,128,1,float16,fp8,0,1.6715253194173176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,128,1,fp8,fp8,0,1.2695573170979817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,128,1,float16,float16,0,0.937450647354126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,128,1,float16,fp8,0,0.9206399917602539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,128,1,float16,float16,0,0.9317973454793295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,128,1,fp8,fp8,0,0.7049173514048258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,128,1,float16,fp8,0,0.9205386638641357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,128,1,fp8,fp8,0,0.6758986314137777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,128,1,float16,fp8,0,0.9221226374308268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,128,1,float16,float16,0,0.9317119916280111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,128,1,fp8,fp8,0,0.6768853664398193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,128,1,float16,float16,0,0.9351519743601481
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,128,1,float16,fp8,0,0.9202026526133219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,128,1,fp8,fp8,0,0.6775360107421875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,128,1,float16,float16,0,0.9409440358479818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,128,1,float16,fp8,0,0.9225599765777588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,128,1,fp8,fp8,0,0.6832693417867025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,128,1,float16,fp8,0,12.709930419921875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,128,1,float16,float16,0,12.834693908691406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,128,1,fp8,fp8,0,9.302410761515299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,128,1,float16,float16,0,12.92678960164388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,128,1,float16,fp8,0,12.802144368489584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,128,1,fp8,fp8,0,9.414581298828125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,128,1,float16,fp8,0,12.968971252441406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,128,1,float16,float16,0,13.030036926269531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,128,1,float16,float16,0,6.202181498209636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,128,1,fp8,fp8,0,9.36845842997233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,128,1,float16,float16,0,13.026021321614584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,128,1,fp8,fp8,0,9.50423494974772
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,128,1,float16,fp8,0,12.907349904378256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,128,1,float16,fp8,0,6.314229329427083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,128,1,float16,fp8,0,6.546213150024414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,128,1,fp8,fp8,0,4.984986623128255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,128,1,float16,float16,0,6.58846918741862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,128,1,fp8,fp8,0,4.663546562194824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,128,1,float16,float16,0,6.334901173909505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,128,1,float16,fp8,0,6.347706476847331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,128,1,fp8,fp8,0,4.6677548090616865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,128,1,float16,fp8,0,6.446432113647461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,128,1,fp8,fp8,0,4.701493263244629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,128,1,float16,float16,0,6.435408274332683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,128,1,float16,fp8,0,6.310496012369792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,128,1,float16,float16,0,6.378896077473958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,128,1,float16,float16,0,3.120965321858724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,128,1,float16,fp8,0,3.074650764465332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,128,1,fp8,fp8,0,4.72049077351888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,128,1,float16,float16,0,3.1904373168945312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,128,1,fp8,fp8,0,2.3624320030212402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,128,1,float16,fp8,0,3.2105493545532227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,128,1,float16,float16,0,3.1099840799967446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,128,1,float16,fp8,0,3.0916268030802407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,128,1,fp8,fp8,0,2.5292906761169434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,128,1,fp8,fp8,0,2.367157300313314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,128,1,float16,float16,0,3.1262613932291665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,128,1,float16,fp8,0,3.07747745513916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,128,1,fp8,fp8,0,2.3779679934183755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,128,1,float16,float16,0,3.111839930216471
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,128,1,float16,fp8,0,3.0690507888793945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,128,1,float16,float16,0,1.6042453447977703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,128,1,float16,fp8,0,1.58295472462972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,128,1,fp8,fp8,0,2.4004319508870444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,128,1,float16,float16,0,1.6621707280476887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,128,1,fp8,fp8,0,1.2345226605733235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,128,1,float16,float16,0,1.60698668162028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,128,1,fp8,fp8,0,1.3232320149739583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,128,1,float16,fp8,0,1.6321600278218586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,128,1,float16,fp8,0,1.5814293225606282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,128,1,fp8,fp8,0,1.2398347059885662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,128,1,float16,float16,0,1.6205919583638508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,128,1,fp8,fp8,0,1.2444693247477214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,128,1,float16,fp8,0,1.5981119473775227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,128,1,float16,float16,0,1.6143040657043457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,128,1,float16,fp8,0,1.5923733711242676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,128,1,float16,float16,0,0.8625226815541586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,128,1,float16,float16,0,0.8765386740366617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,128,1,float16,fp8,0,0.8743893305460612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,128,1,fp8,fp8,0,0.6746346950531006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,128,1,fp8,fp8,0,0.7142666975657145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,128,1,fp8,fp8,0,1.2567466894785564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,128,1,float16,fp8,0,0.8508640130360922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,128,1,float16,float16,0,0.8638772964477539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,128,1,float16,fp8,0,0.849280039469401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,128,1,fp8,fp8,0,0.6790186564127604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,128,1,float16,float16,0,0.8688693046569824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,128,1,float16,fp8,0,0.8504906495412191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,128,1,fp8,fp8,0,0.6796000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,128,1,float16,float16,0,0.870533307393392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,128,1,float16,float16,0,0.49349331855773926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,128,1,float16,fp8,0,0.8580959637959799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,128,1,float16,fp8,0,0.4849439859390259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,128,1,fp8,fp8,0,0.6806666851043701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,128,1,float16,float16,0,0.49633065859476727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,128,1,float16,fp8,0,0.49218134085337323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,128,1,fp8,fp8,0,0.3954933484395345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,128,1,float16,float16,0,0.4949333270390828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,128,1,fp8,fp8,0,0.3736960093180339
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,128,1,fp8,fp8,0,0.3733439842859904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,128,1,float16,fp8,0,0.4875946839650472
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,128,1,float16,float16,0,0.4952053229014079
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,128,1,fp8,fp8,0,0.37547731399536133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,128,1,float16,float16,0,0.49746668338775635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,128,1,float16,fp8,0,0.486133337020874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,128,1,float16,fp8,0,0.48869868119557697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,128,1,fp8,fp8,0,0.37781866391499835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,128,1,float16,fp8,0,7.583642959594727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,128,1,fp8,fp8,0,5.896933237711589
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,128,1,float16,float16,0,7.969231923421224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,128,1,float16,fp8,0,7.723093032836914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,128,1,float16,float16,0,7.86077880859375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,128,1,fp8,fp8,0,5.9281972249348955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,128,1,float16,fp8,0,7.783290863037109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,128,1,float16,float16,0,7.8503678639729815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,128,1,float16,float16,0,3.7655998865763345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,128,1,fp8,fp8,0,5.9357865651448565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,128,1,float16,float16,0,7.780693054199219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,128,1,fp8,fp8,0,6.025733311971028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,128,1,float16,fp8,0,7.954416275024414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,128,1,float16,fp8,0,3.6836268107096353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,128,1,fp8,fp8,0,3.2148265838623047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,128,1,float16,float16,0,3.9104267756144204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,128,1,float16,fp8,0,3.9688854217529297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,128,1,fp8,fp8,0,2.970010757446289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,128,1,float16,fp8,0,3.747626622517904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,128,1,fp8,fp8,0,2.9803520838419595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,128,1,float16,float16,0,3.8102614084879556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,128,1,float16,float16,0,3.7762505213419595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,128,1,float16,fp8,0,3.735637346903483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,128,1,fp8,fp8,0,2.991093317667643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,128,1,float16,float16,0,3.856138547261556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,128,1,float16,fp8,0,3.7237065633138022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,128,1,float16,float16,0,1.932901382446289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,128,1,fp8,fp8,0,3.0221598943074546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,128,1,float16,float16,0,1.9964799880981445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,128,1,float16,fp8,0,1.8838240305582683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,128,1,float16,fp8,0,1.9665172894795735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,128,1,fp8,fp8,0,1.5210720698038738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,128,1,fp8,fp8,0,1.6497386296590169
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,128,1,float16,float16,0,1.921562671661377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,128,1,float16,fp8,0,1.8986186981201172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,128,1,fp8,fp8,0,1.5274933179219563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,128,1,float16,float16,0,1.9301600456237793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,128,1,fp8,fp8,0,1.530501365661621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,128,1,float16,fp8,0,1.8923840522766113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,128,1,float16,float16,0,1.952624003092448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,128,1,fp8,fp8,0,1.5420053799947102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,128,1,float16,fp8,0,1.9057119687398274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,128,1,float16,float16,0,1.005184014638265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,128,1,float16,float16,0,1.0475786526997883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,128,1,float16,fp8,0,0.9902880191802979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,128,1,float16,float16,0,1.008629322052002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,128,1,fp8,fp8,0,0.8081599871317545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,128,1,float16,fp8,0,0.989952007929484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,128,1,float16,fp8,0,1.0368586381276448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,128,1,fp8,fp8,0,0.8787360191345215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,128,1,fp8,fp8,0,0.8045492966969808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,128,1,float16,float16,0,1.0141653219858806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,128,1,fp8,fp8,0,0.8109172979990641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,128,1,float16,fp8,0,0.9951786994934082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,128,1,float16,fp8,0,1.0005919933319092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,128,1,float16,float16,0,1.021456003189087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,128,1,float16,float16,0,0.5657866795857748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,128,1,float16,float16,0,0.5522880156834921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,128,1,float16,fp8,0,0.5559360186258951
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,128,1,fp8,fp8,0,0.8208533128102621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,128,1,fp8,fp8,0,0.47144532203674316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,128,1,float16,fp8,0,0.5420586665471395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,128,1,fp8,fp8,0,0.43087466557820636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,128,1,float16,float16,0,0.5541866620381674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,128,1,fp8,fp8,0,0.43005867799123126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,128,1,float16,fp8,0,0.5445760091145834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,128,1,float16,float16,0,0.5547626813252767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,128,1,fp8,fp8,0,0.43324800332387287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,128,1,float16,fp8,0,0.5442026853561401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,128,1,float16,fp8,0,0.5461653470993042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,128,1,float16,float16,0,0.5583306550979614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,128,1,fp8,fp8,0,0.43805332978566486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,128,1,float16,float16,0,0.30136533578236896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,128,1,float16,float16,0,0.31193600098292035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,128,1,float16,fp8,0,0.3084320028622945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,128,1,fp8,fp8,0,0.2520693341890971
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,128,1,fp8,fp8,0,0.26741333802541095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,128,1,float16,fp8,0,0.29337066411972046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,128,1,float16,float16,0,0.30135999123255414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,128,1,fp8,fp8,0,0.25277866919835407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,128,1,float16,float16,0,0.30139732360839844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,128,1,float16,fp8,0,0.29578133424123126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,128,1,float16,fp8,0,0.29613866408665973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,128,1,fp8,fp8,0,0.253493328889211
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,128,1,float16,float16,0,0.30163733164469403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,128,1,float16,fp8,0,0.2961973349253337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,128,1,fp8,fp8,0,0.25524266560872394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,128,1,float16,fp8,0,7.935344060262044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,128,1,float16,float16,0,8.203280131022135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,128,1,fp8,fp8,0,6.54316775004069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,128,1,float16,float16,0,8.12939198811849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,128,1,float16,fp8,0,7.774309158325195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,128,1,float16,float16,0,8.187994639078775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,128,1,fp8,fp8,0,6.518655776977539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,128,1,float16,fp8,0,7.9468428293863935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,128,1,float16,float16,0,3.9493227005004883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,128,1,fp8,fp8,0,6.531642913818359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,128,1,float16,float16,0,8.360170364379883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,128,1,fp8,fp8,0,6.597898483276367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,128,1,float16,fp8,0,8.137237548828125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,128,1,float16,fp8,0,3.8114401499430337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,128,1,float16,float16,0,4.186469395955403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,128,1,float16,fp8,0,4.10863463083903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,128,1,fp8,fp8,0,3.2446187337239585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,128,1,fp8,fp8,0,3.5580320358276367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,128,1,float16,float16,0,4.018992106119792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,128,1,float16,fp8,0,3.8194506963094077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,128,1,fp8,fp8,0,3.260197321573893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,128,1,float16,float16,0,3.9688746134440103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,128,1,fp8,fp8,0,3.2778825759887695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,128,1,float16,fp8,0,3.8330507278442383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,128,1,float16,fp8,0,3.8603359858194985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,128,1,float16,float16,0,3.9951305389404297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,128,1,float16,float16,0,1.9693867365519206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,128,1,fp8,fp8,0,3.3134028116861978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,128,1,float16,fp8,0,1.920357386271159
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,128,1,float16,float16,0,2.0827412605285645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,128,1,float16,float16,0,1.9691839218139648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,128,1,fp8,fp8,0,1.6307093302408855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,128,1,float16,fp8,0,2.079888025919596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,128,1,float16,fp8,0,1.9285386403401692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,128,1,fp8,fp8,0,1.6408586502075195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,128,1,fp8,fp8,0,1.810815970102946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,128,1,float16,float16,0,1.980778694152832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,128,1,float16,fp8,0,1.9332159360249836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,128,1,float16,float16,0,1.9899039268493652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,128,1,fp8,fp8,0,1.646015961964925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,128,1,float16,fp8,0,1.9622774124145508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,128,1,float16,float16,0,1.0199039777119954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,128,1,fp8,fp8,0,1.6693813006083171
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,128,1,float16,fp8,0,0.9957173665364584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,128,1,float16,float16,0,1.0786026318868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,128,1,fp8,fp8,0,0.8485493659973145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,128,1,float16,fp8,0,1.0582133134206135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,128,1,float16,float16,0,1.0249866644541423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,128,1,fp8,fp8,0,0.9437546730041504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,128,1,float16,fp8,0,0.997973362604777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,128,1,fp8,fp8,0,0.8546079794565836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,128,1,float16,float16,0,1.0228373209635417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,128,1,float16,fp8,0,1.003930648167928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,128,1,fp8,fp8,0,0.857157309850057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,128,1,float16,float16,0,1.030949354171753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,128,1,fp8,fp8,0,0.8691039880116781
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,128,1,float16,fp8,0,1.012991984685262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,128,1,float16,float16,0,0.5669386784235636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,128,1,float16,float16,0,0.5444960196812948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,128,1,float16,fp8,0,0.5637973149617513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,128,1,float16,fp8,0,0.5340906778971354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,128,1,fp8,fp8,0,0.4597546656926473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,128,1,fp8,fp8,0,0.5059146483739217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,128,1,float16,float16,0,0.5466613372166952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,128,1,float16,fp8,0,0.5374879837036133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,128,1,fp8,fp8,0,0.4611733357111613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,128,1,float16,float16,0,0.5496000051498413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,128,1,float16,fp8,0,0.5371520121892294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,128,1,fp8,fp8,0,0.4607093334197998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,128,1,float16,float16,0,0.5525386730829874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,128,1,float16,float16,0,0.30797866980234784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,128,1,float16,fp8,0,0.2990506688753764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,128,1,float16,fp8,0,0.5389920075734457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,128,1,fp8,fp8,0,0.4678986469904582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,128,1,float16,fp8,0,0.3134133418401082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,128,1,float16,float16,0,0.31876800457636517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,128,1,fp8,fp8,0,0.27614933252334595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,128,1,fp8,fp8,0,0.2507413427035014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,128,1,float16,fp8,0,0.30049065748850506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,128,1,float16,float16,0,0.30867733558019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,128,1,fp8,fp8,0,0.25030932823816937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,128,1,float16,float16,0,0.31063999732335407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,128,1,float16,fp8,0,0.3016853332519531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,128,1,fp8,fp8,0,0.25244800249735516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,128,1,float16,float16,0,0.31121599674224854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,128,1,float16,fp8,0,0.3043573300043742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,128,1,fp8,fp8,0,0.2545386751492818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,128,1,float16,float16,0,0.17060800393422446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,128,1,float16,fp8,0,0.1675893266995748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,128,1,float16,float16,0,0.1805386741956075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,128,1,fp8,fp8,0,0.1495146652062734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,128,1,float16,fp8,0,0.17704532543818155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,128,1,fp8,fp8,0,0.16376533110936484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,128,1,float16,float16,0,0.17139200369517008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,128,1,float16,fp8,0,0.16695467631022134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,128,1,fp8,fp8,0,0.15080533425013223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,128,1,float16,float16,0,0.17243733008702597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,128,1,float16,fp8,0,0.16778133312861124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,128,1,fp8,fp8,0,0.15221333503723145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,128,1,float16,float16,0,0.17199466625849405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,128,1,float16,fp8,0,0.16804800430933634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,128,1,fp8,fp8,0,0.15461333592732748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,128,1,fp8,fp8,0,4.375626564025879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,128,1,float16,fp8,0,4.893306732177734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,128,1,float16,fp8,0,4.946325302124023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,128,1,float16,float16,0,5.153130531311035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,128,1,float16,float16,0,5.140090624491374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,128,1,fp8,fp8,0,4.3506825764973955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,128,1,float16,fp8,0,4.955557187398274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,128,1,float16,float16,0,5.174453417460124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,128,1,float16,float16,0,2.530458609263102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,128,1,fp8,fp8,0,4.397359848022461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,128,1,float16,float16,0,5.221391995747884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,128,1,float16,fp8,0,5.090693473815918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,128,1,fp8,fp8,0,4.424720128377278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,128,1,float16,fp8,0,2.449573357899984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,128,1,float16,fp8,0,2.6395252545674643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,128,1,float16,float16,0,2.719210624694824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,128,1,fp8,fp8,0,2.4279467264811196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,128,1,fp8,fp8,0,2.1772586504618325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,128,1,float16,float16,0,2.5338080724080405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,128,1,float16,fp8,0,2.4655253092447915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,128,1,fp8,fp8,0,2.177845319112142
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,128,1,float16,fp8,0,2.463557402292887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,128,1,float16,float16,0,2.543935934702555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,128,1,fp8,fp8,0,2.1894453366597495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,128,1,float16,float16,0,2.566197395324707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,128,1,float16,fp8,0,2.4999252955118814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,128,1,float16,float16,0,1.281226634979248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,128,1,fp8,fp8,0,2.218517303466797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,128,1,float16,fp8,0,1.246016025543213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,128,1,float16,float16,0,1.3783253033955891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,128,1,float16,fp8,0,1.3563253084818523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,128,1,fp8,fp8,0,1.1035412947336833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,128,1,float16,float16,0,1.2868373394012451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,128,1,float16,fp8,0,1.2502559820810955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,128,1,fp8,fp8,0,1.107653299967448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,128,1,fp8,fp8,0,1.236133337020874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,128,1,float16,float16,0,1.2875733375549316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,128,1,fp8,fp8,0,1.1152373154958088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,128,1,float16,fp8,0,1.2553919951121013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,128,1,float16,float16,0,1.297871987024943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,128,1,float16,fp8,0,1.2716106573740642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,128,1,float16,float16,0,0.6709067026774088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,128,1,float16,fp8,0,0.6520053148269653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,128,1,fp8,fp8,0,1.1294773419698079
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,128,1,fp8,fp8,0,0.5805066823959351
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,128,1,float16,float16,0,0.7098186810811361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,128,1,float16,fp8,0,0.6984480222066244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,128,1,float16,float16,0,0.6733919779459635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,128,1,float16,fp8,0,0.6559199889500936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,128,1,fp8,fp8,0,0.5810773372650146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,128,1,fp8,fp8,0,0.6482880115509033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,128,1,float16,float16,0,0.6739093462626139
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,128,1,float16,fp8,0,0.6573280096054077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,128,1,fp8,fp8,0,0.5859146515528361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,128,1,float16,float16,0,0.677898645401001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,128,1,float16,fp8,0,0.6649866501490275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,128,1,fp8,fp8,0,0.5921333233515421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,128,1,float16,float16,0,0.36344532171885174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,128,1,float16,fp8,0,0.3551679849624634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,128,1,float16,float16,0,0.38280534744262695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,128,1,fp8,fp8,0,0.3529119888941447
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,128,1,float16,fp8,0,0.37835200627644855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,128,1,fp8,fp8,0,0.30724799633026123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,128,1,float16,float16,0,0.36671467622121173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,128,1,fp8,fp8,0,0.3088906606038411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,128,1,float16,fp8,0,0.35693331559499103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,128,1,float16,float16,0,0.3675626516342163
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,128,1,float16,fp8,0,0.3566720088322957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,128,1,fp8,fp8,0,0.3102186719576518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,128,1,float16,float16,0,0.3683520158131917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,128,1,float16,fp8,0,0.3599413235982259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,128,1,fp8,fp8,0,0.3157386581103007
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,128,1,float16,float16,0,0.19946134090423584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,128,1,float16,float16,0,0.21977599461873373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,128,1,float16,fp8,0,0.21742933988571167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,128,1,fp8,fp8,0,0.19501866896947226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,128,1,float16,fp8,0,0.19501332441965738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,128,1,fp8,fp8,0,0.17598400513331094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,128,1,float16,fp8,0,0.1965706745783488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,128,1,float16,float16,0,0.2004959980646769
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,128,1,fp8,fp8,0,0.1767680048942566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,128,1,float16,float16,0,0.202239990234375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,128,1,float16,fp8,0,0.19708265860875449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,128,1,fp8,fp8,0,0.17854932943979898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,128,1,float16,float16,0,0.2034506599108378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,128,1,float16,float16,0,0.11851200461387634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,128,1,fp8,fp8,0,0.18016000588734946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,128,1,float16,fp8,0,0.1163200040658315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,128,1,float16,fp8,0,0.19767999649047852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,128,1,float16,fp8,0,0.1229759951432546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,128,1,float16,float16,0,0.12427199880282085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,128,1,fp8,fp8,0,0.12081066767374675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,128,1,fp8,fp8,0,0.10821333527565002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,128,1,float16,float16,0,0.11794666449228923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,128,1,float16,fp8,0,0.11624000469843547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,128,1,fp8,fp8,0,0.1088266670703888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,128,1,float16,fp8,0,0.11617599924405415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,128,1,fp8,fp8,0,0.10865066448847453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,128,1,float16,float16,0,0.1184213360150655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,128,1,float16,float16,0,0.1204213301340739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,128,1,float16,fp8,0,0.11714133620262146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,128,1,fp8,fp8,0,0.1125866671403249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,128,1,float16,fp8,0,4.884746551513672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,128,1,fp8,fp8,0,4.284005482991536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,128,1,float16,float16,0,5.007520039876302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,128,1,fp8,fp8,0,4.294447898864746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,128,1,float16,float16,0,4.961082776387532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,128,1,float16,fp8,0,4.999637285868327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,128,1,float16,fp8,0,4.916816075642903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,128,1,float16,float16,0,4.975712140401204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,128,1,float16,float16,0,2.393786589304606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,128,1,fp8,fp8,0,4.348106702168782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,128,1,float16,fp8,0,2.3962666193644204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,128,1,float16,float16,0,5.186245282491048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,128,1,float16,fp8,0,5.276927947998047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,128,1,fp8,fp8,0,4.411733309427897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,128,1,float16,float16,0,2.846485455830892
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,128,1,float16,fp8,0,2.802666664123535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,128,1,fp8,fp8,0,2.459493319193522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,128,1,fp8,fp8,0,2.130666732788086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,128,1,float16,float16,0,2.4041760762532554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,128,1,float16,fp8,0,2.4218506813049316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,128,1,fp8,fp8,0,2.154325326283773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,128,1,float16,float16,0,2.486149311065674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,128,1,float16,fp8,0,2.5041707356770835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,128,1,fp8,fp8,0,2.183226744333903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,128,1,float16,float16,0,1.2106506824493408
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,128,1,float16,fp8,0,2.52130126953125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,128,1,float16,float16,0,2.532485326131185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,128,1,float16,fp8,0,1.2141066392262776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,128,1,fp8,fp8,0,2.2058986028035483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,128,1,float16,float16,0,1.4333066940307617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,128,1,fp8,fp8,0,1.0509706338246663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,128,1,float16,fp8,0,1.3996319770812988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,128,1,float16,float16,0,1.2201173305511475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,128,1,fp8,fp8,0,1.236624002456665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,128,1,float16,fp8,0,1.219098647435506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,128,1,fp8,fp8,0,1.0842506885528564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,128,1,float16,float16,0,1.228437344233195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,128,1,float16,fp8,0,1.2323466936747234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,128,1,fp8,fp8,0,1.0945813655853271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,128,1,float16,fp8,0,1.2604693571726482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,128,1,float16,float16,0,0.6202773253122965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,128,1,fp8,fp8,0,1.1072959899902344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,128,1,float16,fp8,0,0.7004906336466471
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,128,1,float16,float16,0,1.2686293125152588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,128,1,float16,fp8,0,0.6201440095901489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,128,1,float16,float16,0,0.7153653303782145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,128,1,fp8,fp8,0,0.6260000069936117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,128,1,fp8,fp8,0,0.53657066822052
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,128,1,float16,float16,0,0.6227039893468221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,128,1,float16,fp8,0,0.6239146788914999
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,128,1,fp8,fp8,0,0.539738655090332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,128,1,float16,fp8,0,0.627898653348287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,128,1,fp8,fp8,0,0.5496480067571005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,128,1,float16,float16,0,0.6310346523920695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,128,1,float16,float16,0,0.6394240061442057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,128,1,float16,fp8,0,0.6385226647059122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,128,1,float16,float16,0,0.32409600416819256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,128,1,fp8,fp8,0,0.5647146701812744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,128,1,float16,float16,0,0.3731679916381836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,128,1,float16,fp8,0,0.3645546833674113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,128,1,fp8,fp8,0,0.32505067189534503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,128,1,fp8,fp8,0,0.2816266616185506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,128,1,float16,fp8,0,0.3251733382542928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,128,1,float16,float16,0,0.32790933052698773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,128,1,float16,fp8,0,0.32784533500671387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,128,1,fp8,fp8,0,0.2832000056902568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,128,1,float16,float16,0,0.33018134037653607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,128,1,float16,fp8,0,0.3304746747016907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,128,1,fp8,fp8,0,0.2865973313649495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,128,1,float16,float16,0,0.3331413269042969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,128,1,fp8,fp8,0,0.29206933577855426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,128,1,float16,float16,0,0.20216000080108643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,128,1,float16,fp8,0,0.333840012550354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,128,1,float16,float16,0,0.1767786741256714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,128,1,float16,fp8,0,0.1763413349787394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,128,1,fp8,fp8,0,0.1710240046183268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,128,1,float16,fp8,0,0.19795199235280356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,128,1,fp8,fp8,0,0.1479573349157969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,128,1,float16,float16,0,0.17861332496007284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,128,1,float16,fp8,0,0.17836799224217734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,128,1,float16,float16,0,0.18018666903177896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,128,1,fp8,fp8,0,0.14847999811172485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,128,1,float16,fp8,0,0.17962666352589926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,128,1,float16,float16,0,0.18124266465504965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,128,1,float16,fp8,0,0.1821333368619283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,128,1,fp8,fp8,0,0.15293332934379578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,128,1,fp8,fp8,0,0.15052266915639242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,128,1,float16,float16,0,0.11004799604415894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,128,1,float16,float16,0,0.09473599990208943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,128,1,float16,fp8,0,0.10824533303578694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,128,1,fp8,fp8,0,0.08299200236797333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,128,1,float16,fp8,0,0.09500267108281453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,128,1,float16,float16,0,0.09539199868837993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,128,1,fp8,fp8,0,0.09570133686065674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,128,1,fp8,fp8,0,0.08391466736793518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,128,1,float16,fp8,0,0.09577066699663798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,128,1,float16,float16,0,0.0953546663125356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,128,1,fp8,fp8,0,0.08522133032480876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,128,1,float16,fp8,0,0.09629333019256592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,128,1,float16,fp8,0,0.0976639986038208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,128,1,float16,float16,0,0.09672000010808308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,128,1,fp8,fp8,0,0.08740267157554626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,128,1,float16,float16,0,0.06305066744486491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,128,1,float16,float16,0,0.05491200089454651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,128,1,fp8,fp8,0,0.05788800120353699
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,128,1,float16,fp8,0,0.056287998954455055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,128,1,fp8,fp8,0,0.05060799916585287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,128,1,float16,float16,0,0.055530667304992676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,128,1,fp8,fp8,0,0.05032533407211304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,128,1,float16,fp8,0,0.061466669042905174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,128,1,float16,fp8,0,0.055904000997543335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,128,1,float16,float16,0,0.05564799904823303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,128,1,float16,fp8,0,0.05644799768924713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,128,1,fp8,fp8,0,0.051872000098228455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,128,1,float16,float16,0,0.05690666536490122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,128,1,fp8,fp8,0,0.053802669048309326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,128,1,float16,fp8,0,0.056645333766937256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,2,128,1,float16,fp8,0,3.835162798563639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,2,128,1,fp8,fp8,0,3.5459518432617188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,2,128,1,float16,float16,0,3.827232042948405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,1,128,1,fp8,fp8,0,3.5196959177652993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,4,128,1,float16,float16,0,3.8940693537394204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,4,128,1,float16,fp8,0,3.890618642171224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,1,128,1,float16,fp8,0,3.798053423563639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,1,128,1,float16,float16,0,3.8175252278645835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,1,128,1,float16,float16,0,1.8594773610432942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,4,128,1,fp8,fp8,0,3.619647979736328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,1,128,1,float16,fp8,0,1.8540852864583333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,8,128,1,float16,float16,0,3.9869651794433594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,8,128,1,float16,fp8,0,3.9662294387817383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,40,8,128,1,fp8,fp8,0,3.6679840087890625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,40,128,1,float16,fp8,0,2.2730239232381186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,1,128,1,fp8,fp8,0,1.7597227096557617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,40,128,1,float16,float16,0,2.3337759971618652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,40,128,1,fp8,fp8,0,2.1061760584513345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,2,128,1,float16,float16,0,1.866645336151123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,2,128,1,float16,fp8,0,1.8981173833211262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,2,128,1,fp8,fp8,0,1.7683146794637044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,4,128,1,float16,float16,0,1.9457759857177734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,4,128,1,float16,fp8,0,1.9577333132425945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,4,128,1,fp8,fp8,0,1.8006827036539714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,1,128,1,float16,float16,0,0.9442453384399414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,8,128,1,float16,float16,0,1.9975733757019043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,8,128,1,fp8,fp8,0,1.830074628194173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,40,8,128,1,float16,fp8,0,1.9880053202311199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,1,128,1,float16,fp8,0,0.9406186739603678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,40,128,1,float16,float16,0,1.1583946545918782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,40,128,1,float16,fp8,0,1.1200213432312012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,1,128,1,fp8,fp8,0,0.8621973196665446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,2,128,1,float16,float16,0,0.9448533058166504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,40,128,1,fp8,fp8,0,1.0541386604309082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,2,128,1,float16,fp8,0,0.9462506771087646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,2,128,1,fp8,fp8,0,0.8944106896718343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,4,128,1,float16,float16,0,0.9587519963582357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,4,128,1,float16,fp8,0,0.9594026406606039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,4,128,1,fp8,fp8,0,0.9051253000895182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,8,128,1,float16,float16,0,0.9967947006225586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,1,128,1,float16,float16,0,0.48165865739186603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,8,128,1,fp8,fp8,0,0.9162879784901937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,1,128,1,float16,fp8,0,0.4803573290506999
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,40,8,128,1,float16,fp8,0,0.9955306847890218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,40,128,1,float16,float16,0,0.580730676651001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,1,128,1,fp8,fp8,0,0.4411413272221883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,40,128,1,float16,fp8,0,0.5654400189717611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,2,128,1,float16,float16,0,0.4848959843317668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,40,128,1,fp8,fp8,0,0.5306986570358276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,2,128,1,fp8,fp8,0,0.44441600640614826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,2,128,1,float16,fp8,0,0.4838666518529256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,4,128,1,float16,float16,0,0.4885066747665405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,4,128,1,float16,fp8,0,0.48922133445739746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,4,128,1,fp8,fp8,0,0.45364801088968915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,1,128,1,float16,float16,0,0.25148266553878784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,8,128,1,float16,float16,0,0.4980479876200358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,8,128,1,fp8,fp8,0,0.46458133061726886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,40,8,128,1,float16,fp8,0,0.4973333279291789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,40,128,1,float16,float16,0,0.3035413424173991
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,40,128,1,float16,fp8,0,0.29468266169230145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,1,128,1,float16,fp8,0,0.252837340037028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,40,128,1,fp8,fp8,0,0.2759893337885539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,1,128,1,fp8,fp8,0,0.23175466060638428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,2,128,1,float16,fp8,0,0.255621333916982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,2,128,1,fp8,fp8,0,0.23405865828196207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,2,128,1,float16,float16,0,0.2541653315226237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,4,128,1,float16,float16,0,0.2567039926846822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,4,128,1,float16,fp8,0,0.2584480047225952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,4,128,1,fp8,fp8,0,0.2380853295326233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,8,128,1,float16,fp8,0,0.2610879937807719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,8,128,1,float16,float16,0,0.2608533302942912
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,1,128,1,float16,float16,0,0.13757333159446716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,40,8,128,1,fp8,fp8,0,0.24097599585851034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,40,128,1,float16,float16,0,0.16550399859746298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,40,128,1,float16,fp8,0,0.16004266341527304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,40,128,1,fp8,fp8,0,0.14829867084821066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,1,128,1,fp8,fp8,0,0.1274186670780182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,2,128,1,float16,float16,0,0.13847999771436056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,1,128,1,float16,fp8,0,0.1374559998512268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,2,128,1,float16,fp8,0,0.13863466183344522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,2,128,1,fp8,fp8,0,0.12776000301043192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,4,128,1,float16,float16,0,0.14086932937304178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,4,128,1,float16,fp8,0,0.14114133516947427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,4,128,1,fp8,fp8,0,0.1299253304799398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,8,128,1,float16,float16,0,0.1422719955444336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,8,128,1,float16,fp8,0,0.1420799990495046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,40,8,128,1,fp8,fp8,0,0.13202133774757385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,40,128,1,float16,float16,0,0.0965173343817393
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,40,128,1,float16,fp8,0,0.09426666299502055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,40,128,1,fp8,fp8,0,0.08499200145403545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,1,128,1,float16,float16,0,0.07799999912579854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,1,128,1,float16,fp8,0,0.07820266485214233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,2,128,1,float16,float16,0,0.0787306676308314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,1,128,1,fp8,fp8,0,0.072543998559316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,2,128,1,float16,fp8,0,0.07952000200748444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,2,128,1,fp8,fp8,0,0.07330666482448578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,4,128,1,float16,float16,0,0.07906666894753774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,4,128,1,float16,fp8,0,0.07938666641712189
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,4,128,1,fp8,fp8,0,0.07473599910736084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,8,128,1,float16,fp8,0,0.08017066617806752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,8,128,1,float16,float16,0,0.07985066870848338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,40,8,128,1,fp8,fp8,0,0.07550933460394542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,40,128,1,float16,fp8,0,0.053077335158983864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,40,128,1,fp8,fp8,0,0.05045866469542185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,40,128,1,float16,float16,0,0.053957333167394005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,1,128,1,float16,float16,0,0.04650666813055674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,1,128,1,float16,fp8,0,0.04695466657479604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,1,128,1,fp8,fp8,0,0.0429013321797053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,2,128,1,float16,float16,0,0.04619733492533366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,2,128,1,float16,fp8,0,0.04629333317279816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,2,128,1,fp8,fp8,0,0.04297066728274027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,4,128,1,float16,float16,0,0.047237331668535866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,4,128,1,fp8,fp8,0,0.04529066880544027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,4,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,8,128,1,float16,float16,0,0.04805333415667216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,8,128,1,float16,fp8,0,0.0483893354733785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,40,8,128,1,fp8,fp8,0,0.046757335464159645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,40,128,1,float16,float16,0,0.03449599941571554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,40,128,1,float16,fp8,0,0.03418133407831192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,40,128,1,fp8,fp8,0,0.03271999955177307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,1,128,1,float16,float16,0,0.031136001149813335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,1,128,1,fp8,fp8,0,0.029839999973773956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,1,128,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,2,128,1,float16,float16,0,0.03162666658560435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,2,128,1,float16,fp8,0,0.031701333820819855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,2,128,1,fp8,fp8,0,0.02976000060637792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,4,128,1,float16,float16,0,0.030896000564098358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,4,128,1,float16,fp8,0,0.03230399886767069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,4,128,1,fp8,fp8,0,0.03068266560633977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,8,128,1,float16,float16,0,0.03213333338499069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,8,128,1,fp8,fp8,0,0.03268799930810928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,40,8,128,1,float16,fp8,0,0.03271466741959254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,1,128,1,float16,float16,0,1.5797173182169597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,1,128,1,float16,fp8,0,1.5814933776855469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,1,128,1,fp8,fp8,0,1.5220746994018555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,2,128,1,float16,float16,0,1.5919040044148762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,2,128,1,float16,fp8,0,1.6036373774210613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,2,128,1,fp8,fp8,0,1.531984011332194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,4,128,1,float16,float16,0,1.6549919446309407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,4,128,1,float16,fp8,0,1.6723252932230632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,1,128,1,float16,float16,0,0.8019146919250488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,4,128,1,fp8,fp8,0,1.5644267400105794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,8,128,1,float16,float16,0,1.7058720588684082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,8,128,1,float16,fp8,0,1.70142396291097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,40,8,128,1,fp8,fp8,0,1.6036853790283203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,1,128,1,float16,fp8,0,0.8039039770762125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,40,128,1,float16,float16,0,1.0204479694366455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,1,128,1,fp8,fp8,0,0.7474933465321859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,40,128,1,fp8,fp8,0,0.9415626525878906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,40,128,1,float16,fp8,0,0.9806239604949951
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,2,128,1,float16,float16,0,0.8049759864807129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,2,128,1,fp8,fp8,0,0.7733493645985922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,2,128,1,float16,fp8,0,0.8073493639628092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,4,128,1,float16,fp8,0,0.8184053103129069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,4,128,1,float16,float16,0,0.8205920060475668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,4,128,1,fp8,fp8,0,0.7904640038808187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,8,128,1,float16,float16,0,0.8499306837717692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,8,128,1,float16,fp8,0,0.8507200082143148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,1,128,1,float16,float16,0,0.4112853209177653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,40,8,128,1,fp8,fp8,0,0.8022666772206625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,40,128,1,float16,float16,0,0.5130186478296915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,1,128,1,float16,fp8,0,0.4105120102564494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,40,128,1,float16,fp8,0,0.49557868639628094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,1,128,1,fp8,fp8,0,0.3833386500676473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,40,128,1,fp8,fp8,0,0.4794986645380656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,2,128,1,float16,float16,0,0.4145973523457845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,2,128,1,float16,fp8,0,0.4135146538416545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,2,128,1,fp8,fp8,0,0.3879679838816325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,4,128,1,float16,float16,0,0.4174400170644124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,4,128,1,float16,fp8,0,0.4175573190053304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,4,128,1,fp8,fp8,0,0.3959519863128662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,8,128,1,float16,float16,0,0.425376017888387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,8,128,1,float16,fp8,0,0.4261173407236735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,1,128,1,float16,float16,0,0.21461333831151327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,1,128,1,float16,fp8,0,0.2153866688410441
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,40,8,128,1,fp8,fp8,0,0.4094613393147786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,1,128,1,fp8,fp8,0,0.20339200894037882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,40,128,1,float16,float16,0,0.26823999484380084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,40,128,1,float16,fp8,0,0.2598666747411092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,40,128,1,fp8,fp8,0,0.25013333559036255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,2,128,1,float16,float16,0,0.217631995677948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,2,128,1,fp8,fp8,0,0.20450133085250854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,2,128,1,float16,fp8,0,0.21676266193389893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,4,128,1,float16,fp8,0,0.22102399667104086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,8,128,1,float16,float16,0,0.22318933407465616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,4,128,1,fp8,fp8,0,0.20833067099253336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,4,128,1,float16,float16,0,0.2207733392715454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,8,128,1,fp8,fp8,0,0.21391467253367105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,40,8,128,1,float16,fp8,0,0.22376000881195068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,40,128,1,float16,float16,0,0.1454026699066162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,40,128,1,float16,fp8,0,0.14200533429781595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,1,128,1,float16,float16,0,0.11875733733177185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,1,128,1,float16,fp8,0,0.11892267068227132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,40,128,1,fp8,fp8,0,0.13644267121950784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,1,128,1,fp8,fp8,0,0.11400533715883891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,2,128,1,float16,float16,0,0.11966933806737264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,2,128,1,float16,fp8,0,0.11980799833933513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,4,128,1,float16,float16,0,0.12116799751917522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,2,128,1,fp8,fp8,0,0.1144426663716634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,4,128,1,float16,fp8,0,0.12100799878438313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,8,128,1,float16,float16,0,0.12297067046165466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,8,128,1,float16,fp8,0,0.12261866529782613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,4,128,1,fp8,fp8,0,0.11619200309117635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,40,8,128,1,fp8,fp8,0,0.11906133095423381
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,40,128,1,float16,float16,0,0.08981333176294963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,40,128,1,float16,fp8,0,0.08741333087285359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,1,128,1,float16,float16,0,0.06924266616503398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,1,128,1,float16,fp8,0,0.06987200180689494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,40,128,1,fp8,fp8,0,0.07894933223724365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,1,128,1,fp8,fp8,0,0.06346666812896729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,2,128,1,float16,fp8,0,0.0705386648575465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,2,128,1,float16,float16,0,0.06952000161012013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,4,128,1,float16,float16,0,0.06994133194287618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,2,128,1,fp8,fp8,0,0.06484266618887584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,4,128,1,float16,fp8,0,0.07106666763623555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,4,128,1,fp8,fp8,0,0.06625066697597504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,8,128,1,fp8,fp8,0,0.06885333359241486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,8,128,1,float16,fp8,0,0.07137066622575124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,40,8,128,1,float16,float16,0,0.07115200161933899
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,40,128,1,fp8,fp8,0,0.04751466711362203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,40,128,1,float16,float16,0,0.04980266590913137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,1,128,1,float16,float16,0,0.0421973317861557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,40,128,1,float16,fp8,0,0.04896000027656555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,1,128,1,float16,fp8,0,0.04276266694068909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,1,128,1,fp8,fp8,0,0.040752001106739044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,2,128,1,float16,float16,0,0.04268800218900045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,2,128,1,float16,fp8,0,0.042170668641726174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,2,128,1,fp8,fp8,0,0.04031466692686081
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,4,128,1,float16,fp8,0,0.04377066592375437
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,4,128,1,fp8,fp8,0,0.042992000778516136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,4,128,1,float16,float16,0,0.0433599998553594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,8,128,1,float16,fp8,0,0.04413333535194397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,8,128,1,float16,float16,0,0.04388799766699473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,40,8,128,1,fp8,fp8,0,0.04367466767628988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,40,128,1,float16,float16,0,0.03339733431736628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,40,128,1,float16,fp8,0,0.032698666055997215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,1,128,1,float16,float16,0,0.02995733420054118
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,40,128,1,fp8,fp8,0,0.032314665615558624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,1,128,1,fp8,fp8,0,0.029989334444204967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,1,128,1,float16,fp8,0,0.030058667063713074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,2,128,1,float16,fp8,0,0.03046400099992752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,2,128,1,fp8,fp8,0,0.029861333469549816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,2,128,1,float16,float16,0,0.030159999926884968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,4,128,1,float16,float16,0,0.030159999926884968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,4,128,1,float16,fp8,0,0.030613332986831665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,4,128,1,fp8,fp8,0,0.03036266565322876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,8,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,8,128,1,fp8,fp8,0,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,40,8,128,1,float16,fp8,0,0.03086400032043457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,40,128,1,float16,float16,0,0.02498133232196172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,40,128,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,40,128,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,1,128,1,float16,float16,0,0.023397333920001984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,1,128,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,1,128,1,fp8,fp8,0,0.022517333428064983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,2,128,1,float16,float16,0,0.02345066765944163
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,2,128,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,4,128,1,float16,float16,0,0.023567999402681988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,2,128,1,fp8,fp8,0,0.02213866760333379
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,4,128,1,float16,fp8,0,0.02362666775782903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,4,128,1,fp8,fp8,0,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,8,128,1,float16,float16,0,0.023520000278949738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,8,128,1,float16,fp8,0,0.023919999599456787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,40,8,128,1,fp8,fp8,0,0.0229066660006841
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,1,128,1,float16,float16,0,0.7383999824523926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,1,128,1,fp8,fp8,0,0.753386656443278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,2,128,1,float16,float16,0,0.7429973284403483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,2,128,1,float16,fp8,0,0.7422613302866617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,1,128,1,float16,fp8,0,0.7378666400909424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,2,128,1,fp8,fp8,0,0.7698026498158773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,4,128,1,float16,float16,0,0.7527146339416504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,4,128,1,float16,fp8,0,0.7563626766204834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,1,128,1,float16,float16,0,0.38117865721384686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,4,128,1,fp8,fp8,0,0.7945173581441244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,8,128,1,float16,fp8,0,0.7868959903717041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,8,128,1,float16,float16,0,0.7910613218943278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,40,8,128,1,fp8,fp8,0,0.803551991780599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,40,128,1,float16,fp8,0,0.4826986789703369
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,1,128,1,float16,fp8,0,0.3819733460744222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,40,128,1,fp8,fp8,0,0.4775199890136719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,40,128,1,float16,float16,0,0.5004266500473022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,1,128,1,fp8,fp8,0,0.38176532586415607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,2,128,1,float16,float16,0,0.38311465581258136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,2,128,1,float16,fp8,0,0.38285334904988605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,2,128,1,fp8,fp8,0,0.38815999031066895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,4,128,1,float16,float16,0,0.38811198870340985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,4,128,1,fp8,fp8,0,0.39903465906778973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,4,128,1,float16,fp8,0,0.38969600200653076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,8,128,1,float16,float16,0,0.3955039978027344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,8,128,1,float16,fp8,0,0.3961600065231323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,1,128,1,float16,float16,0,0.20140800873438516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,40,8,128,1,fp8,fp8,0,0.4085013469060262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,40,128,1,float16,fp8,0,0.258730669816335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,1,128,1,float16,fp8,0,0.20237332582473755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,40,128,1,float16,float16,0,0.2648800015449524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,40,128,1,fp8,fp8,0,0.24811200300852457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,1,128,1,fp8,fp8,0,0.20317866404851279
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,2,128,1,float16,fp8,0,0.20383999745051065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,2,128,1,float16,float16,0,0.20294400056203207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,2,128,1,fp8,fp8,0,0.20497065782546997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,4,128,1,fp8,fp8,0,0.2085919976234436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,4,128,1,float16,float16,0,0.2063680092493693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,4,128,1,float16,fp8,0,0.20784533023834229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,8,128,1,float16,float16,0,0.21006399393081665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,8,128,1,float16,fp8,0,0.20988265673319498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,40,8,128,1,fp8,fp8,0,0.21423999468485513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,1,128,1,float16,float16,0,0.11256000399589539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,40,128,1,float16,float16,0,0.15028267105420431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,1,128,1,float16,fp8,0,0.11363200346628825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,40,128,1,float16,fp8,0,0.1409173309803009
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,1,128,1,fp8,fp8,0,0.11390933394432068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,40,128,1,fp8,fp8,0,0.13637866576512656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,2,128,1,float16,float16,0,0.1135093371073405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,2,128,1,float16,fp8,0,0.11376532912254333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,2,128,1,fp8,fp8,0,0.11446932951609294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,4,128,1,float16,float16,0,0.11423466602961223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,4,128,1,fp8,fp8,0,0.11607467134793599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,8,128,1,float16,float16,0,0.1165066659450531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,4,128,1,float16,fp8,0,0.11507733662923177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,8,128,1,float16,fp8,0,0.11656000216801961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,40,128,1,float16,float16,0,0.08508267005284627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,40,8,128,1,fp8,fp8,0,0.11834133664766948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,1,128,1,float16,float16,0,0.0647573322057724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,40,128,1,float16,fp8,0,0.08311466872692108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,1,128,1,float16,fp8,0,0.06458133459091187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,40,128,1,fp8,fp8,0,0.07821333408355713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,2,128,1,float16,float16,0,0.06483200192451477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,1,128,1,fp8,fp8,0,0.06427733103434245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,2,128,1,float16,fp8,0,0.06555733581384023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,4,128,1,float16,float16,0,0.06584000090758006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,4,128,1,float16,fp8,0,0.0664106657107671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,2,128,1,fp8,fp8,0,0.06374933322270711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,4,128,1,fp8,fp8,0,0.06534933547178905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,8,128,1,float16,fp8,0,0.067221333583196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,8,128,1,fp8,fp8,0,0.0673173318306605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,40,8,128,1,float16,float16,0,0.06835733354091644
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,40,128,1,float16,float16,0,0.048101335763931274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,40,128,1,float16,fp8,0,0.04710933566093445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,1,128,1,float16,fp8,0,0.040362666050593056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,1,128,1,fp8,fp8,0,0.03980266551176707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,40,128,1,fp8,fp8,0,0.0484746644894282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,1,128,1,float16,float16,0,0.040778666734695435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,2,128,1,float16,float16,0,0.040522667268911995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,2,128,1,fp8,fp8,0,0.0408746674656868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,4,128,1,float16,float16,0,0.041562666495641075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,4,128,1,float16,fp8,0,0.041434665520985924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,4,128,1,fp8,fp8,0,0.0421066681543986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,2,128,1,float16,fp8,0,0.04115733255942663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,8,128,1,float16,float16,0,0.04154133299986521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,8,128,1,fp8,fp8,0,0.04366933306058248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,40,8,128,1,float16,fp8,0,0.04252266883850098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,40,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,40,128,1,float16,fp8,0,0.033002667129039764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,1,128,1,float16,float16,0,0.030016000072161358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,40,128,1,fp8,fp8,0,0.032138665517171226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,1,128,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,2,128,1,float16,float16,0,0.030533333619435627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,2,128,1,fp8,fp8,0,0.029887999097506206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,1,128,1,float16,fp8,0,0.030917334059874218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,2,128,1,float16,fp8,0,0.031162666777769726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,4,128,1,float16,float16,0,0.030479999879995983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,4,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,8,128,1,float16,float16,0,0.03137599925200144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,4,128,1,fp8,fp8,0,0.03028800090154012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,8,128,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,40,8,128,1,fp8,fp8,0,0.03179199993610382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,40,128,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,1,128,1,float16,fp8,0,0.022218666970729828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,1,128,1,float16,float16,0,0.02204799900452296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,1,128,1,fp8,fp8,0,0.02205866575241089
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,2,128,1,float16,float16,0,0.022416000564893086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,40,128,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,40,128,1,float16,float16,0,0.022511998812357586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,2,128,1,float16,fp8,0,0.022453332940737408
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,4,128,1,float16,float16,0,0.022117334107557934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,2,128,1,fp8,fp8,0,0.021925332645575207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,4,128,1,float16,fp8,0,0.02258133391539256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,4,128,1,fp8,fp8,0,0.02256533255179723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,8,128,1,float16,float16,0,0.02256533255179723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,40,128,1,float16,float16,0,0.020186666399240494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,8,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,40,128,1,float16,fp8,0,0.019946667055288952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,40,8,128,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,1,128,1,float16,float16,0,0.019962667177120846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,40,128,1,fp8,fp8,0,0.020037333170572918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,1,128,1,float16,fp8,0,0.01966933285196622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,1,128,1,fp8,fp8,0,0.01978133370478948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,2,128,1,float16,float16,0,0.019941333681344986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,2,128,1,fp8,fp8,0,0.01953599974513054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,2,128,1,float16,fp8,0,0.019909333437681198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,8,128,1,float16,fp8,0,0.020080000162124634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,4,128,1,fp8,fp8,0,0.019776000330845516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,4,128,1,float16,fp8,0,0.020106667031844456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,8,128,1,float16,float16,0,0.01964266722400983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,8,128,1,fp8,fp8,0,0.020261333634455998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,40,4,128,1,float16,float16,0,0.019744000087181728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,1,128,1,float16,float16,0,0.48204267024993896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,1,128,1,fp8,fp8,0,0.5108160177866617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,2,128,1,float16,float16,0,0.4843786557515462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,2,128,1,float16,fp8,0,0.4836266835530599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,2,128,1,fp8,fp8,0,0.5194613138834635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,4,128,1,float16,float16,0,0.4899093310038249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,1,128,1,float16,fp8,0,0.48181335131327313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,4,128,1,float16,fp8,0,0.4894186655680339
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,4,128,1,fp8,fp8,0,0.5337973435719808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,1,128,1,float16,float16,0,0.24908800919850668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,8,128,1,float16,fp8,0,0.49694931507110596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,8,128,1,fp8,fp8,0,0.5416533152262369
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,40,8,128,1,float16,float16,0,0.49817601839701336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,40,128,1,float16,float16,0,0.2847093343734741
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,40,128,1,float16,fp8,0,0.2762239972750346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,40,128,1,fp8,fp8,0,0.31382934252421063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,1,128,1,float16,fp8,0,0.24884267648061117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,2,128,1,float16,float16,0,0.25061333179473877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,2,128,1,float16,fp8,0,0.25037866830825806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,1,128,1,fp8,fp8,0,0.2667413353919983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,4,128,1,float16,float16,0,0.25380265712738037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,4,128,1,float16,fp8,0,0.25577600797017414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,2,128,1,fp8,fp8,0,0.27133333683013916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,4,128,1,fp8,fp8,0,0.27457600831985474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,8,128,1,float16,float16,0,0.25674666961034137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,8,128,1,fp8,fp8,0,0.2784053285916646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,40,8,128,1,float16,fp8,0,0.25684799750645954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,1,128,1,float16,float16,0,0.13520532846450806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,40,128,1,float16,float16,0,0.15296533703804016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,1,128,1,float16,fp8,0,0.13527466853459677
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,1,128,1,fp8,fp8,0,0.14668800433476767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,40,128,1,float16,fp8,0,0.14828800161679587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,40,128,1,fp8,fp8,0,0.16765334208806357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,2,128,1,float16,float16,0,0.13597333431243896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,2,128,1,float16,fp8,0,0.13613866766293845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,4,128,1,float16,float16,0,0.13725333412488303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,4,128,1,fp8,fp8,0,0.1487573285897573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,8,128,1,float16,float16,0,0.13773333032925925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,8,128,1,float16,fp8,0,0.1381280024846395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,4,128,1,float16,fp8,0,0.13801599542299905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,2,128,1,fp8,fp8,0,0.14660267035166422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,40,8,128,1,fp8,fp8,0,0.14993600050608316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,1,128,1,float16,float16,0,0.07559466858704884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,40,128,1,float16,float16,0,0.08610133330027263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,1,128,1,float16,fp8,0,0.07630399862925212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,40,128,1,float16,fp8,0,0.08497066299120586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,1,128,1,fp8,fp8,0,0.08148266871770223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,40,128,1,fp8,fp8,0,0.09403733412424724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,2,128,1,float16,float16,0,0.0763733337322871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,2,128,1,float16,fp8,0,0.07652799785137177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,2,128,1,fp8,fp8,0,0.08198399841785431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,4,128,1,float16,float16,0,0.07632533212502797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,4,128,1,float16,fp8,0,0.07823466757933299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,8,128,1,float16,fp8,0,0.07806399961312611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,8,128,1,float16,float16,0,0.07808533310890198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,4,128,1,fp8,fp8,0,0.08344533046086629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,40,8,128,1,fp8,fp8,0,0.08616000413894653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,40,128,1,float16,float16,0,0.048170665899912514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,40,128,1,float16,fp8,0,0.04673066735267639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,1,128,1,float16,float16,0,0.044624000787734985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,40,128,1,fp8,fp8,0,0.05473066866397858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,1,128,1,float16,fp8,0,0.0444213350613912
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,1,128,1,fp8,fp8,0,0.0469706654548645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,2,128,1,fp8,fp8,0,0.047279998660087585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,4,128,1,float16,float16,0,0.045279999574025474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,2,128,1,float16,fp8,0,0.04483733574549357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,4,128,1,float16,fp8,0,0.04598399996757507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,4,128,1,fp8,fp8,0,0.04905066887537638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,8,128,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,8,128,1,float16,fp8,0,0.04650666813055674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,2,128,1,float16,float16,0,0.04454400142033895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,40,8,128,1,fp8,fp8,0,0.05144000053405762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,40,128,1,float16,float16,0,0.032218667368094124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,1,128,1,float16,fp8,0,0.03169066707293192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,40,128,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,1,128,1,fp8,fp8,0,0.03336533407370249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,1,128,1,float16,float16,0,0.03134933362404505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,40,128,1,float16,fp8,0,0.032261334359645844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,4,128,1,float16,float16,0,0.03166933357715607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,2,128,1,float16,float16,0,0.03166399896144867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,4,128,1,float16,fp8,0,0.03215999901294708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,4,128,1,fp8,fp8,0,0.03409066547950109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,2,128,1,float16,fp8,0,0.0310506671667099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,8,128,1,float16,float16,0,0.03243733445803324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,8,128,1,float16,fp8,0,0.032629333436489105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,2,128,1,fp8,fp8,0,0.03287466615438461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,40,8,128,1,fp8,fp8,0,0.03573866685231527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,40,128,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,40,128,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,1,128,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,1,128,1,float16,float16,0,0.024362665911515553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,2,128,1,float16,float16,0,0.024864000578721363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,40,128,1,float16,float16,0,0.025770666698614757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,1,128,1,fp8,fp8,0,0.02568000058333079
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,2,128,1,float16,fp8,0,0.025050667424996693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,2,128,1,fp8,fp8,0,0.026122666895389557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,4,128,1,float16,float16,0,0.025285333395004272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,4,128,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,4,128,1,fp8,fp8,0,0.02604266752799352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,8,128,1,float16,float16,0,0.025386666258176167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,8,128,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,40,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,40,128,1,float16,fp8,0,0.019573333362738293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,40,8,128,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,40,128,1,fp8,fp8,0,0.02086399992307027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,1,128,1,float16,fp8,0,0.01869333287080129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,1,128,1,float16,float16,0,0.01860800012946129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,1,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,2,128,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,2,128,1,fp8,fp8,0,0.019850666324297588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,4,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,8,128,1,float16,float16,0,0.01850133389234543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,4,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,4,128,1,float16,float16,0,0.01878400022784869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,2,128,1,float16,float16,0,0.018698666244745255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,8,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,40,8,128,1,fp8,fp8,0,0.019813333948453266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,40,128,1,float16,float16,0,0.018277333428462345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,40,128,1,float16,fp8,0,0.0184906671444575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,40,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,1,128,1,float16,float16,0,0.017994667092959087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,2,128,1,float16,fp8,0,0.018309333672126133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,1,128,1,float16,fp8,0,0.01821333294113477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,2,128,1,float16,float16,0,0.018298666924238205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,4,128,1,float16,float16,0,0.0182239996890227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,2,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,4,128,1,float16,fp8,0,0.018298666924238205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,1,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,4,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,8,128,1,float16,float16,0,0.018277333428462345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,8,128,1,fp8,fp8,0,0.018746666610240936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,40,8,128,1,float16,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,1,128,1,float16,float16,0,0.3291253248850505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,1,128,1,float16,fp8,0,0.32892266909281415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,1,128,1,fp8,fp8,0,0.4145866632461548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,2,128,1,float16,fp8,0,0.3344000180562337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,2,128,1,float16,float16,0,0.33469335238138836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,2,128,1,fp8,fp8,0,0.42074668407440186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,4,128,1,float16,float16,0,0.34116268157958984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,4,128,1,float16,fp8,0,0.342085321744283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,4,128,1,fp8,fp8,0,0.4259626468022664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,8,128,1,float16,fp8,0,0.348037322362264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,8,128,1,float16,float16,0,0.3481066624323527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,40,8,128,1,fp8,fp8,0,0.42900268236796063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,40,128,1,float16,fp8,0,0.1869973341623942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,1,128,1,float16,float16,0,0.17305066188176474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,40,128,1,fp8,fp8,0,0.2404373288154602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,40,128,1,float16,float16,0,0.1928000052769979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,1,128,1,fp8,fp8,0,0.22005865971247354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,1,128,1,float16,fp8,0,0.1732106606165568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,2,128,1,float16,float16,0,0.1751520037651062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,2,128,1,float16,fp8,0,0.17509333292643228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,2,128,1,fp8,fp8,0,0.2207253376642863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,4,128,1,float16,fp8,0,0.178874671459198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,4,128,1,float16,float16,0,0.17826666434605917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,8,128,1,float16,float16,0,0.1823520064353943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,8,128,1,float16,fp8,0,0.1816640098889669
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,8,128,1,fp8,fp8,0,0.22475733359654745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,40,4,128,1,fp8,fp8,0,0.22215465704600015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,40,128,1,float16,float16,0,0.1065066655476888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,40,128,1,float16,fp8,0,0.10448533296585083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,1,128,1,float16,fp8,0,0.09413333733876546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,1,128,1,fp8,fp8,0,0.12052266796429952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,2,128,1,float16,float16,0,0.09397866328557332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,40,128,1,fp8,fp8,0,0.13196266690889993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,1,128,1,float16,float16,0,0.09340266386667888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,2,128,1,fp8,fp8,0,0.12122133374214172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,2,128,1,float16,fp8,0,0.09378133217493693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,4,128,1,float16,float16,0,0.09455999732017517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,4,128,1,float16,fp8,0,0.09497066338857015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,4,128,1,fp8,fp8,0,0.1222826639811198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,8,128,1,float16,fp8,0,0.0976639986038208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,40,128,1,float16,float16,0,0.057589332262674965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,8,128,1,float16,float16,0,0.09778666496276855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,40,8,128,1,fp8,fp8,0,0.12386666735013326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,1,128,1,float16,float16,0,0.05309333403905233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,1,128,1,float16,fp8,0,0.053690666953722634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,40,128,1,fp8,fp8,0,0.07363733152548473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,1,128,1,fp8,fp8,0,0.06666133304437001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,2,128,1,float16,float16,0,0.05283733208974203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,40,128,1,float16,fp8,0,0.05625066657861074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,2,128,1,float16,fp8,0,0.05366399884223938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,4,128,1,float16,float16,0,0.053871999184290566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,4,128,1,float16,fp8,0,0.05470933516820272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,2,128,1,fp8,fp8,0,0.06781866649786632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,4,128,1,fp8,fp8,0,0.06911466519037883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,8,128,1,float16,float16,0,0.054901331663131714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,8,128,1,float16,fp8,0,0.055344000458717346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,40,128,1,float16,float16,0,0.03522666543722153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,1,128,1,float16,fp8,0,0.034858666360378265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,40,128,1,float16,fp8,0,0.03551466763019562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,40,128,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,1,128,1,fp8,fp8,0,0.04111466556787491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,40,8,128,1,fp8,fp8,0,0.07158933579921722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,2,128,1,float16,float16,0,0.034927998979886375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,1,128,1,float16,float16,0,0.03458133339881897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,2,128,1,float16,fp8,0,0.03469333300987879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,2,128,1,fp8,fp8,0,0.04134399940570196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,4,128,1,float16,float16,0,0.03482133398453394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,4,128,1,float16,fp8,0,0.035205334424972534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,4,128,1,fp8,fp8,0,0.041722665230433144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,8,128,1,float16,fp8,0,0.03590933233499527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,8,128,1,fp8,fp8,0,0.04332800209522247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,40,128,1,float16,float16,0,0.026367999613285065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,40,8,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,40,128,1,float16,fp8,0,0.02681066592534383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,1,128,1,float16,float16,0,0.02608533451954524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,40,128,1,fp8,fp8,0,0.031370667119820915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,1,128,1,float16,fp8,0,0.026213333010673523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,1,128,1,fp8,fp8,0,0.029951999584833782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,2,128,1,float16,fp8,0,0.026202666262785595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,4,128,1,float16,float16,0,0.02626666675011317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,4,128,1,float16,fp8,0,0.026687999566396076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,4,128,1,fp8,fp8,0,0.03052799900372823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,8,128,1,float16,float16,0,0.026186667382717133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,2,128,1,fp8,fp8,0,0.030661332110563915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,2,128,1,float16,float16,0,0.026127999027570088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,8,128,1,float16,fp8,0,0.026485333840052288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,40,8,128,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,40,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,40,128,1,float16,fp8,0,0.021744000415007275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,40,128,1,fp8,fp8,0,0.024314666787783306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,1,128,1,float16,float16,0,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,1,128,1,float16,fp8,0,0.02090666691462199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,2,128,1,float16,float16,0,0.020330666253964107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,1,128,1,fp8,fp8,0,0.023936000963052113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,2,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,2,128,1,fp8,fp8,0,0.023669332265853882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,4,128,1,float16,float16,0,0.021370666722456615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,4,128,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,4,128,1,fp8,fp8,0,0.023711999257405598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,8,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,8,128,1,float16,float16,0,0.02147199958562851
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,40,128,1,float16,float16,0,0.017914666483799618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,40,128,1,float16,fp8,0,0.01821333294113477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,40,128,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,1,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,1,128,1,float16,fp8,0,0.01785600061217944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,40,8,128,1,fp8,fp8,0,0.02401600033044815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,1,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,2,128,1,float16,float16,0,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,2,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,2,128,1,float16,fp8,0,0.018058666338523228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,4,128,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,4,128,1,float16,float16,0,0.017616000026464462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,4,128,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,8,128,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,8,128,1,float16,float16,0,0.017498667041460674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,40,128,1,float16,float16,0,0.01637866720557213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,40,128,1,fp8,fp8,0,0.01815466706951459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,1,128,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,40,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,1,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,40,8,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,1,128,1,fp8,fp8,0,0.018645333747069042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,2,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,2,128,1,float16,float16,0,0.016735999534527462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,2,128,1,fp8,fp8,0,0.018160000443458557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,4,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,4,128,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,4,128,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,8,128,1,float16,fp8,0,0.01754666616519292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,8,128,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,40,8,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,1,128,1,float16,fp8,0,0.27878934144973755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,2,128,1,float16,float16,0,0.2810080051422119
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,1,128,1,fp8,fp8,0,0.3692213296890259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,2,128,1,float16,fp8,0,0.2810186743736267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,1,128,1,float16,float16,0,0.2781493266423543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,2,128,1,fp8,fp8,0,0.369210680325826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,4,128,1,float16,float16,0,0.2837226589520772
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,4,128,1,fp8,fp8,0,0.3717333475748698
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,8,128,1,float16,fp8,0,0.2874506711959839
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,8,128,1,float16,float16,0,0.28759467601776123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,4,128,1,float16,fp8,0,0.2849493424097697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,40,8,128,1,fp8,fp8,0,0.37240533034006756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,40,128,1,float16,fp8,0,0.14756799737612405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,40,128,1,fp8,fp8,0,0.20590933163960776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,40,128,1,float16,float16,0,0.14933866262435913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,1,128,1,float16,float16,0,0.14612266421318054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,1,128,1,fp8,fp8,0,0.19620267550150552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,1,128,1,float16,fp8,0,0.14571733276049295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,2,128,1,float16,fp8,0,0.14632532993952432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,2,128,1,fp8,fp8,0,0.19595199823379517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,2,128,1,float16,float16,0,0.14627200365066528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,4,128,1,float16,float16,0,0.146506667137146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,4,128,1,float16,fp8,0,0.14711466431617737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,4,128,1,fp8,fp8,0,0.19819200038909912
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,8,128,1,float16,fp8,0,0.15016000469525656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,8,128,1,float16,float16,0,0.1493333379427592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,40,8,128,1,fp8,fp8,0,0.1987733244895935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,40,128,1,float16,float16,0,0.08017066617806752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,1,128,1,float16,float16,0,0.07966400186220805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,40,128,1,float16,fp8,0,0.07870933413505554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,40,128,1,fp8,fp8,0,0.11281599601109822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,1,128,1,float16,fp8,0,0.08006933331489563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,1,128,1,fp8,fp8,0,0.107013334830602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,2,128,1,float16,fp8,0,0.08027199904123943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,2,128,1,float16,float16,0,0.08039466540018718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,2,128,1,fp8,fp8,0,0.10709866881370544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,4,128,1,float16,float16,0,0.08124800026416779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,4,128,1,fp8,fp8,0,0.1088053286075592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,8,128,1,float16,float16,0,0.08111466467380524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,4,128,1,float16,fp8,0,0.08166400094827016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,8,128,1,float16,fp8,0,0.08228800197442372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,40,8,128,1,fp8,fp8,0,0.11000532905260722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,40,128,1,float16,fp8,0,0.04737600187460581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,40,128,1,float16,float16,0,0.046309332052866616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,40,128,1,fp8,fp8,0,0.06392000118891399
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,1,128,1,float16,float16,0,0.047728002071380615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,1,128,1,float16,fp8,0,0.04799466828505198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,2,128,1,float16,float16,0,0.0476693312327067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,2,128,1,float16,fp8,0,0.04731733103593191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,2,128,1,fp8,fp8,0,0.0611413319905599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,4,128,1,float16,float16,0,0.0487306664387385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,1,128,1,fp8,fp8,0,0.061333333452542625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,4,128,1,fp8,fp8,0,0.0618399977684021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,4,128,1,float16,fp8,0,0.04896533489227295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,8,128,1,float16,float16,0,0.04828266799449921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,8,128,1,float16,fp8,0,0.04920533299446106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,40,8,128,1,fp8,fp8,0,0.06327466666698456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,40,128,1,float16,float16,0,0.03189333279927572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,40,128,1,float16,fp8,0,0.0316746657093366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,1,128,1,float16,float16,0,0.031898667414983116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,1,128,1,fp8,fp8,0,0.03845866769552231
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,1,128,1,float16,fp8,0,0.03239466746648153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,2,128,1,float16,float16,0,0.03243733445803324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,40,128,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,2,128,1,float16,fp8,0,0.032399999598662056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,2,128,1,fp8,fp8,0,0.038746667404969536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,4,128,1,float16,float16,0,0.032405334214369454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,4,128,1,float16,fp8,0,0.032799998919169106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,4,128,1,fp8,fp8,0,0.039018665750821434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,8,128,1,float16,float16,0,0.03212266663710276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,8,128,1,float16,fp8,0,0.033088001112143196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,40,128,1,float16,float16,0,0.023728000621000927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,1,128,1,float16,float16,0,0.023818666736284893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,40,128,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,40,128,1,fp8,fp8,0,0.028592000404993694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,40,8,128,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,1,128,1,float16,fp8,0,0.02366400013367335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,2,128,1,float16,float16,0,0.02366400013367335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,1,128,1,fp8,fp8,0,0.02789866675933202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,2,128,1,fp8,fp8,0,0.027749332288901012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,2,128,1,float16,fp8,0,0.02369066576162974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,4,128,1,float16,float16,0,0.023498666783173878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,4,128,1,float16,fp8,0,0.02443733314673106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,8,128,1,float16,fp8,0,0.024293333292007446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,8,128,1,float16,float16,0,0.023930666347344715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,4,128,1,fp8,fp8,0,0.02807466685771942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,40,128,1,float16,float16,0,0.01989866668979327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,40,128,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,40,128,1,fp8,fp8,0,0.023573334018389385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,1,128,1,float16,float16,0,0.020256000260512035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,40,8,128,1,fp8,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,1,128,1,float16,fp8,0,0.020810666183630627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,1,128,1,fp8,fp8,0,0.023007998863856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,2,128,1,float16,fp8,0,0.020341333001852036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,2,128,1,fp8,fp8,0,0.02239466706911723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,4,128,1,float16,float16,0,0.01969066634774208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,2,128,1,float16,float16,0,0.0205226664741834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,4,128,1,float16,fp8,0,0.020560000091791153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,4,128,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,8,128,1,float16,float16,0,0.020949333906173706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,8,128,1,float16,fp8,0,0.020655999581019085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,40,8,128,1,fp8,fp8,0,0.02372266600529353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,40,128,1,float16,float16,0,0.016645333419243496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,40,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,1,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,1,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,1,128,1,fp8,fp8,0,0.01860800012946129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,40,128,1,fp8,fp8,0,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,2,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,2,128,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,4,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,4,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,8,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,4,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,8,128,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,40,128,1,float16,float16,0,0.015840000162522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,40,8,128,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,40,128,1,fp8,fp8,0,0.01788266624013583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,1,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,40,128,1,float16,fp8,0,0.016496000190575916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,2,128,1,float16,float16,0,0.01640533283352852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,1,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,2,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,1,128,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,4,128,1,float16,float16,0,0.016506666938463848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,4,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,4,128,1,fp8,fp8,0,0.01828266680240631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,2,128,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,8,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,8,128,1,fp8,fp8,0,0.018186666071414948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,40,8,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,128,1,float16,fp8,0,33.17845916748047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,128,1,fp8,fp8,0,21.979583740234375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,128,1,float16,float16,0,34.01402791341146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,128,1,float16,float16,0,33.233812967936196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,128,1,float16,fp8,0,33.61283620198568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,128,1,fp8,fp8,0,21.983428955078125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,128,1,float16,fp8,0,33.40520985921224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,128,1,float16,float16,0,33.44054412841797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,128,1,float16,float16,0,16.70262908935547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,128,1,fp8,fp8,0,22.074330647786457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,128,1,float16,fp8,0,16.712623596191406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,128,1,fp8,fp8,0,22.253712972005207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,128,1,float16,fp8,0,16.67080561319987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,128,1,float16,float16,0,16.84991455078125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,128,1,fp8,fp8,0,11.378607432047525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,128,1,float16,float16,0,33.27926890055338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,128,1,float16,fp8,0,33.85429890950521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,128,1,fp8,fp8,0,11.106063842773438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,128,1,float16,fp8,0,16.654800415039062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,128,1,float16,float16,0,16.7291997273763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,128,1,float16,fp8,0,16.826527913411457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,128,1,fp8,fp8,0,11.172779083251953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,128,1,float16,float16,0,16.91818618774414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,128,1,fp8,fp8,0,11.231098175048828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,128,1,float16,float16,0,17.105818430582683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,128,1,float16,fp8,0,16.753803253173828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,128,1,float16,float16,0,8.425210952758789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,128,1,fp8,fp8,0,5.676399866739909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,128,1,float16,fp8,0,8.335338592529297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,128,1,fp8,fp8,0,11.222304026285807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,128,1,float16,float16,0,8.389653523763021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,128,1,float16,float16,0,8.481903711954752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,128,1,float16,fp8,0,8.481834411621094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,128,1,fp8,fp8,0,5.6140696207682295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,128,1,float16,fp8,0,8.469930648803711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,128,1,fp8,fp8,0,5.605466842651367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,128,1,float16,float16,0,8.56650161743164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,128,1,fp8,fp8,0,5.514837265014648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,128,1,float16,fp8,0,8.436906814575195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,128,1,fp8,fp8,0,5.48684819539388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,128,1,float16,fp8,0,8.374149322509766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,128,1,float16,float16,0,8.413418451944986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,128,1,float16,float16,0,4.157157262166341
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,128,1,float16,float16,0,4.213146527608235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,128,1,float16,fp8,0,4.192367871602376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,128,1,float16,fp8,0,4.323408126831055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,128,1,fp8,fp8,0,2.8663466771443686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,128,1,fp8,fp8,0,2.8764321009318032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,128,1,float16,float16,0,4.207973480224609
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,128,1,float16,fp8,0,4.222400029500325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,128,1,fp8,fp8,0,2.8696587880452475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,128,1,fp8,fp8,0,2.87934939066569
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,128,1,float16,float16,0,4.19212277730306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,128,1,float16,fp8,0,4.240303993225098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,128,1,float16,float16,0,4.223082542419434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,128,1,float16,fp8,0,4.274853388468425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,128,1,fp8,fp8,0,2.8815733591715493
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,128,1,float16,float16,0,19.183563232421875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,128,1,fp8,fp8,0,13.281397501627604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,128,1,float16,fp8,0,19.24187723795573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,128,1,float16,fp8,0,19.137344360351562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,128,1,float16,float16,0,19.618784586588543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,128,1,fp8,fp8,0,12.915392557779947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,128,1,float16,float16,0,19.118228912353516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,128,1,float16,fp8,0,19.229834238688152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,128,1,float16,float16,0,9.713530858357748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,128,1,fp8,fp8,0,13.016597747802734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,128,1,float16,float16,0,19.768997192382812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,128,1,float16,fp8,0,19.367989857991535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,128,1,fp8,fp8,0,13.093269348144531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,128,1,float16,fp8,0,9.606128056844076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,128,1,float16,fp8,0,9.655365626017252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,128,1,fp8,fp8,0,6.394410451253255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,128,1,float16,float16,0,9.864362716674805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,128,1,fp8,fp8,0,6.6002451578776045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,128,1,float16,float16,0,9.870896021525065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,128,1,float16,fp8,0,9.608442942301432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,128,1,fp8,fp8,0,6.482885360717773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,128,1,float16,float16,0,9.758719762166342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,128,1,float16,fp8,0,9.56271998087565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,128,1,fp8,fp8,0,6.437685648600261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,128,1,float16,float16,0,9.915546417236328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,128,1,float16,fp8,0,9.722880045572916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,128,1,float16,float16,0,4.644421259562175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,128,1,float16,fp8,0,4.637125333150228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,128,1,fp8,fp8,0,6.660197575887044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,128,1,float16,fp8,0,4.911301294962565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,128,1,float16,float16,0,4.84388796488444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,128,1,fp8,fp8,0,3.32588259379069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,128,1,fp8,fp8,0,3.245386759440104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,128,1,float16,float16,0,4.8493601481119795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,128,1,fp8,fp8,0,3.2455787658691406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,128,1,float16,fp8,0,4.650597254435222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,128,1,float16,float16,0,4.8491465250651045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,128,1,float16,fp8,0,4.630000114440918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,128,1,fp8,fp8,0,3.257472038269043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,128,1,float16,float16,0,4.831205368041992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,128,1,float16,fp8,0,4.9398454030354815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,128,1,float16,float16,0,2.4437813758850098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,128,1,float16,float16,0,2.4577439626057944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,128,1,fp8,fp8,0,3.2688000996907554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,128,1,float16,fp8,0,2.4518240292867026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,128,1,float16,fp8,0,2.428127924601237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,128,1,fp8,fp8,0,1.7531466484069824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,128,1,fp8,fp8,0,1.7745760281880696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,128,1,float16,fp8,0,2.441530704498291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,128,1,float16,float16,0,2.4974826176961265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,128,1,fp8,fp8,0,1.7547787030537922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,128,1,fp8,fp8,0,1.7492213249206543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,128,1,float16,fp8,0,2.4559200604756675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,128,1,float16,float16,0,2.46012274424235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,128,1,float16,float16,0,2.474415938059489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,128,1,float16,fp8,0,2.492853323618571
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,128,1,fp8,fp8,0,1.759749412536621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,128,1,float16,fp8,0,13.584842681884766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,128,1,fp8,fp8,0,9.197178522745768
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,128,1,float16,float16,0,13.836597442626953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,128,1,float16,float16,0,13.669204711914062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,128,1,fp8,fp8,0,9.306010564168295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,128,1,float16,fp8,0,13.600298563639322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,128,1,float16,fp8,0,13.625802357991537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,128,1,float16,float16,0,14.052565256754557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,128,1,float16,float16,0,6.980741500854492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,128,1,fp8,fp8,0,9.280778884887695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,128,1,float16,float16,0,13.7226931254069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,128,1,fp8,fp8,0,9.417749404907227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,128,1,float16,float16,0,6.864490509033203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,128,1,float16,fp8,0,6.7994028727213545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,128,1,float16,fp8,0,13.901055653889975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,128,1,fp8,fp8,0,4.5661013921101885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,128,1,float16,fp8,0,6.919066747029622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,128,1,fp8,fp8,0,4.735098520914714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,128,1,float16,float16,0,6.8714345296223955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,128,1,fp8,fp8,0,4.572239875793457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,128,1,float16,fp8,0,6.844901402791341
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,128,1,float16,float16,0,6.750586827596028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,128,1,float16,fp8,0,6.799589157104492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,128,1,fp8,fp8,0,4.589813232421875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,128,1,float16,fp8,0,6.704751968383789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,128,1,float16,float16,0,7.016880035400391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,128,1,float16,float16,0,3.393829345703125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,128,1,float16,fp8,0,3.296597480773926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,128,1,fp8,fp8,0,4.645477294921875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,128,1,float16,fp8,0,3.3450826009114585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,128,1,float16,float16,0,3.391066551208496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,128,1,fp8,fp8,0,2.372192064921061
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,128,1,float16,fp8,0,3.3363466262817383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,128,1,fp8,fp8,0,2.413130601247152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,128,1,float16,float16,0,3.4300479888916016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,128,1,fp8,fp8,0,2.3654026985168457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,128,1,float16,float16,0,3.336378733317057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,128,1,fp8,fp8,0,2.3725226720174155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,128,1,float16,fp8,0,3.3284425735473633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,128,1,float16,float16,0,3.344106674194336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,128,1,float16,float16,0,1.7863574028015137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,128,1,float16,fp8,0,1.7723894119262695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,128,1,float16,fp8,0,3.3887198766072593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,128,1,fp8,fp8,0,2.3814239501953125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,128,1,fp8,fp8,0,1.2905386288960774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,128,1,float16,fp8,0,1.7639147440592449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,128,1,float16,float16,0,1.7976373036702473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,128,1,fp8,fp8,0,1.311461369196574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,128,1,float16,float16,0,1.7886773745218914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,128,1,float16,fp8,0,1.7760640780131023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,128,1,fp8,fp8,0,1.2924586931864421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,128,1,float16,float16,0,1.7852373123168945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,128,1,fp8,fp8,0,1.2982079982757568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,128,1,float16,fp8,0,1.7674026489257812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,128,1,float16,fp8,0,1.7720534006754558
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,128,1,float16,float16,0,1.8093120257059734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,128,1,fp8,fp8,0,1.3003839651743572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,128,1,float16,float16,0,17.910330454508465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,128,1,fp8,fp8,0,12.251696268717447
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,128,1,float16,fp8,0,18.005669911702473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,128,1,float16,float16,0,17.97528584798177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,128,1,fp8,fp8,0,12.366517384847006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,128,1,float16,fp8,0,17.817184448242188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,128,1,float16,fp8,0,17.937520345052082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,128,1,float16,float16,0,18.227792104085285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,128,1,float16,float16,0,9.202431996663412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,128,1,fp8,fp8,0,12.334356943766275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,128,1,fp8,fp8,0,12.511866251627604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,128,1,float16,fp8,0,18.18921661376953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,128,1,float16,float16,0,8.938682556152344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,128,1,float16,fp8,0,9.166320164998373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,128,1,float16,float16,0,17.987599690755207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,128,1,float16,fp8,0,9.014256159464518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,128,1,fp8,fp8,0,6.333791732788086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,128,1,fp8,fp8,0,6.186384201049805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,128,1,float16,float16,0,9.082293192545572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,128,1,fp8,fp8,0,6.222639719645183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,128,1,float16,fp8,0,9.044885635375977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,128,1,float16,float16,0,9.258160273234049
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,128,1,float16,fp8,0,9.031525293986002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,128,1,fp8,fp8,0,6.140554428100586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,128,1,float16,float16,0,9.075253168741861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,128,1,float16,fp8,0,9.056330362955729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,128,1,float16,float16,0,4.311024030049642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,128,1,float16,fp8,0,4.369381268819173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,128,1,fp8,fp8,0,3.0715198516845703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,128,1,fp8,fp8,0,6.176031748453776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,128,1,float16,float16,0,4.485941251118978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,128,1,float16,fp8,0,4.592314720153809
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,128,1,float16,float16,0,4.291146596272786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,128,1,fp8,fp8,0,3.0819787979125977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,128,1,float16,fp8,0,4.396741231282552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,128,1,fp8,fp8,0,3.2020692825317383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,128,1,float16,float16,0,4.3192799886067705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,128,1,float16,fp8,0,4.262021382649739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,128,1,fp8,fp8,0,3.0992212295532227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,128,1,float16,fp8,0,4.4243520100911455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,128,1,float16,float16,0,4.533199946085612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,128,1,float16,float16,0,2.2352852821350098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,128,1,float16,float16,0,2.2414560317993164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,128,1,float16,fp8,0,2.2198613484700522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,128,1,fp8,fp8,0,3.1107521057128906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,128,1,fp8,fp8,0,1.668074607849121
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,128,1,float16,fp8,0,2.263493378957113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,128,1,fp8,fp8,0,1.6202826499938965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,128,1,float16,fp8,0,2.2121386528015137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,128,1,fp8,fp8,0,1.6278293927510579
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,128,1,float16,float16,0,2.2620372772216797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,128,1,float16,float16,0,2.2535200119018555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,128,1,float16,fp8,0,2.2165600458780923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,128,1,fp8,fp8,0,1.63265593846639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,128,1,float16,float16,0,1.215882698694865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,128,1,float16,fp8,0,2.221306641896566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,128,1,float16,float16,0,2.2798825899759927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,128,1,float16,float16,0,1.2110933462778728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,128,1,fp8,fp8,0,1.6397066116333008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,128,1,fp8,fp8,0,0.9211946328481039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,128,1,float16,fp8,0,1.2028319835662842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,128,1,float16,fp8,0,1.2174452940622966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,128,1,fp8,fp8,0,0.9024053414662679
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,128,1,float16,float16,0,1.2180853684743245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,128,1,fp8,fp8,0,0.9010879993438721
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,128,1,float16,fp8,0,1.2118559678395588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,128,1,float16,float16,0,1.2215733528137207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,128,1,float16,fp8,0,1.2096107006072998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,128,1,fp8,fp8,0,0.9039253393809
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,128,1,float16,float16,0,1.2323946952819824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,128,1,float16,fp8,0,1.2072800000508626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,128,1,fp8,fp8,0,0.9058506488800049
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,128,1,float16,float16,0,10.693909962972006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,128,1,fp8,fp8,0,7.435706456502278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,128,1,float16,fp8,0,10.707626342773438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,128,1,float16,float16,0,10.662736256917318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,128,1,float16,fp8,0,10.534933090209961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,128,1,float16,float16,0,10.636181513468424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,128,1,fp8,fp8,0,7.438906351725261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,128,1,float16,fp8,0,10.590954462687174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,128,1,float16,float16,0,5.153130531311035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,128,1,fp8,fp8,0,7.444069544474284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,128,1,float16,float16,0,10.722063700358072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,128,1,float16,fp8,0,5.13922119140625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,128,1,fp8,fp8,0,7.43893305460612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,128,1,float16,fp8,0,5.131349245707194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,128,1,float16,float16,0,5.382346471150716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,128,1,float16,fp8,0,10.814229329427084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,128,1,fp8,fp8,0,3.682997385660807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,128,1,fp8,fp8,0,3.844853401184082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,128,1,float16,float16,0,5.200618743896484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,128,1,float16,fp8,0,5.158224105834961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,128,1,fp8,fp8,0,3.6903839111328125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,128,1,float16,float16,0,5.215706825256348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,128,1,float16,fp8,0,5.266661326090495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,128,1,fp8,fp8,0,3.704421361287435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,128,1,float16,fp8,0,5.140474637349446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,128,1,float16,float16,0,5.141749382019043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,128,1,float16,float16,0,2.572725296020508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,128,1,float16,fp8,0,2.5683199564615884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,128,1,fp8,fp8,0,3.7582880655924478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,128,1,float16,float16,0,2.611290613810221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,128,1,fp8,fp8,0,1.8962559700012207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,128,1,float16,fp8,0,2.6281760533650718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,128,1,float16,float16,0,2.5689600308736167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,128,1,fp8,fp8,0,1.9949386914571126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,128,1,float16,fp8,0,2.5474133491516113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,128,1,fp8,fp8,0,1.903999964396159
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,128,1,float16,float16,0,2.586181322733561
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,128,1,float16,fp8,0,2.540325323740641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,128,1,fp8,fp8,0,1.9138719240824382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,128,1,float16,float16,0,2.6017707188924155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,128,1,float16,fp8,0,2.554661273956299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,128,1,float16,float16,0,1.3571359316507976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,128,1,fp8,fp8,0,1.9265012741088867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,128,1,float16,fp8,0,1.3800320625305176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,128,1,fp8,fp8,0,1.0174132982889812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,128,1,float16,fp8,0,1.3501067161560059
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,128,1,float16,float16,0,1.3730026880900066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,128,1,fp8,fp8,0,1.059871991475423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,128,1,float16,fp8,0,1.3394559224446614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,128,1,fp8,fp8,0,1.0195039908091228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,128,1,float16,float16,0,1.3596107165018718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,128,1,float16,float16,0,1.3766773541768391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,128,1,float16,fp8,0,1.3504799207051594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,128,1,fp8,fp8,0,1.0237706502278645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,128,1,float16,fp8,0,1.3558932940165203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,128,1,fp8,fp8,0,1.02838929494222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,128,1,float16,float16,0,0.7572480042775472
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,128,1,float16,float16,0,0.7589866320292155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,128,1,float16,float16,0,1.3690560658772786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,128,1,float16,fp8,0,0.7480053106943766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,128,1,fp8,fp8,0,0.5718079805374146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,128,1,fp8,fp8,0,0.5481919844945272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,128,1,float16,float16,0,0.7579893271128336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,128,1,float16,fp8,0,0.746789296468099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,128,1,float16,fp8,0,0.7482453187306722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,128,1,fp8,fp8,0,0.548746665318807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,128,1,float16,fp8,0,0.752079963684082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,128,1,float16,float16,0,0.7608106931050619
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,128,1,fp8,fp8,0,0.5512693325678507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,128,1,float16,float16,0,0.7618559996287028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,128,1,float16,fp8,0,0.7497599919637045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,128,1,fp8,fp8,0,0.5553280115127563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,128,1,float16,fp8,0,10.143920262654623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,128,1,fp8,fp8,0,7.493183771769206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,128,1,float16,fp8,0,10.378133138020834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,128,1,float16,float16,0,10.26248550415039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,128,1,float16,float16,0,10.396720250447592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,128,1,fp8,fp8,0,7.484266916910808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,128,1,float16,fp8,0,10.118544260660807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,128,1,float16,float16,0,10.30672518412272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,128,1,float16,float16,0,4.9766238530476885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,128,1,fp8,fp8,0,7.563850402832031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,128,1,float16,fp8,0,4.906554539998372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,128,1,float16,float16,0,10.512378692626953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,128,1,float16,fp8,0,4.982789357503255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,128,1,float16,fp8,0,10.353359858194986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,128,1,float16,float16,0,5.184282620747884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,128,1,fp8,fp8,0,7.55621337890625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,128,1,fp8,fp8,0,3.9721225102742515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,128,1,fp8,fp8,0,3.742778778076172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,128,1,float16,float16,0,4.883935928344727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,128,1,float16,fp8,0,5.061445236206055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,128,1,fp8,fp8,0,3.754624048868815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,128,1,float16,float16,0,5.15117867787679
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,128,1,fp8,fp8,0,3.765477180480957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,128,1,float16,fp8,0,5.044624010721843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,128,1,float16,float16,0,4.933871905008952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,128,1,float16,fp8,0,4.998000144958496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,128,1,float16,float16,0,2.4717866579691568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,128,1,float16,fp8,0,2.437551975250244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,128,1,fp8,fp8,0,1.9003839492797852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,128,1,fp8,fp8,0,3.803248087565104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,128,1,float16,float16,0,2.489189306894938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,128,1,float16,fp8,0,2.5258026123046875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,128,1,float16,float16,0,2.580378691355387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,128,1,float16,fp8,0,2.4364800453186035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,128,1,fp8,fp8,0,2.03165864944458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,128,1,fp8,fp8,0,1.9045546849568684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,128,1,float16,float16,0,2.5005760192871094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,128,1,float16,fp8,0,2.4393653869628906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,128,1,fp8,fp8,0,1.9179360071818035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,128,1,float16,float16,0,2.4967360496520996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,128,1,float16,float16,0,1.2866613070170085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,128,1,fp8,fp8,0,1.9335254033406575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,128,1,float16,fp8,0,1.272655963897705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,128,1,float16,fp8,0,2.4893600145975747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,128,1,float16,float16,0,1.335744063059489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,128,1,fp8,fp8,0,1.001578648885091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,128,1,float16,float16,0,1.2902826468149822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,128,1,fp8,fp8,0,1.0716053644816081
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,128,1,float16,fp8,0,1.270405371983846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,128,1,fp8,fp8,0,1.002394676208496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,128,1,float16,fp8,0,1.3139466444651287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,128,1,float16,float16,0,1.304410696029663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,128,1,fp8,fp8,0,1.006773312886556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,128,1,float16,fp8,0,1.2852853139241536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,128,1,float16,float16,0,1.299621343612671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,128,1,float16,fp8,0,1.2836213111877441
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,128,1,float16,float16,0,0.7131733099619547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,128,1,fp8,fp8,0,1.017077366511027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,128,1,float16,float16,0,0.6988639831542969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,128,1,float16,fp8,0,0.7028640111287435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,128,1,float16,fp8,0,0.6885120073954264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,128,1,fp8,fp8,0,0.5784266789754232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,128,1,fp8,fp8,0,0.5495359897613525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,128,1,float16,float16,0,0.7025706768035889
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,128,1,float16,fp8,0,0.6894079844156901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,128,1,fp8,fp8,0,0.5484586556752523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,128,1,float16,float16,0,0.7030826409657797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,128,1,fp8,fp8,0,0.5530933141708374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,128,1,float16,fp8,0,0.6898986498514811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,128,1,float16,float16,0,0.7068373362223307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,128,1,float16,fp8,0,0.6922240257263184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,128,1,fp8,fp8,0,0.5561813513437907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,128,1,float16,float16,0,0.4095573425292969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,128,1,float16,fp8,0,0.4028640190760295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,128,1,float16,float16,0,0.40516801675160724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,128,1,fp8,fp8,0,0.31942399342854816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,128,1,fp8,fp8,0,0.30188800891240436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,128,1,float16,fp8,0,0.3965760072072347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,128,1,float16,float16,0,0.4031360149383545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,128,1,float16,fp8,0,0.3975253502527873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,128,1,fp8,fp8,0,0.3033600052197774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,128,1,float16,float16,0,0.4063946803410848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,128,1,float16,fp8,0,0.40034135182698566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,128,1,fp8,fp8,0,0.3047039906183879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,128,1,float16,float16,0,0.4092479944229126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,128,1,fp8,fp8,0,0.30611733595530194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,128,1,float16,fp8,0,0.4012426535288493
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,128,1,float16,fp8,0,5.997856140136719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,128,1,float16,float16,0,6.074501037597656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,128,1,fp8,fp8,0,4.731557210286458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,128,1,float16,float16,0,6.1429704030354815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,128,1,fp8,fp8,0,4.748527844746907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,128,1,float16,fp8,0,6.089402516682942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,128,1,float16,float16,0,6.245525360107422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,128,1,float16,fp8,0,6.175578435262044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,128,1,float16,float16,0,3.0086132685343423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,128,1,fp8,fp8,0,4.786069234212239
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,128,1,float16,float16,0,6.204586664835612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,128,1,float16,fp8,0,6.101338704427083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,128,1,float16,fp8,0,3.0892159144083657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,128,1,float16,float16,0,3.2042932510375977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,128,1,fp8,fp8,0,4.8803253173828125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,128,1,float16,fp8,0,2.960063934326172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,128,1,fp8,fp8,0,2.572378635406494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,128,1,fp8,fp8,0,2.385770638783773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,128,1,float16,float16,0,3.0060640970865884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,128,1,float16,fp8,0,2.9438559214274087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,128,1,fp8,fp8,0,2.3956106503804526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,128,1,float16,float16,0,3.0490185419718423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,128,1,float16,fp8,0,2.9968531926472983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,128,1,fp8,fp8,0,2.412735939025879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,128,1,float16,float16,0,3.0310239791870117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,128,1,float16,fp8,0,3.005375862121582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,128,1,float16,float16,0,1.547690709431966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,128,1,float16,fp8,0,1.5134986241658528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,128,1,fp8,fp8,0,1.2234826882680256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,128,1,fp8,fp8,0,2.440842628479004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,128,1,float16,float16,0,1.592074712117513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,128,1,float16,fp8,0,1.5813813209533691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,128,1,float16,float16,0,1.546021302541097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,128,1,fp8,fp8,0,1.3277706305185955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,128,1,float16,fp8,0,1.5212799708048503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,128,1,fp8,fp8,0,1.2311253547668457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,128,1,float16,float16,0,1.5417547225952148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,128,1,float16,fp8,0,1.5174345970153809
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,128,1,fp8,fp8,0,1.2355573177337646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,128,1,float16,float16,0,1.5536853472391765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,128,1,float16,fp8,0,1.5354347229003906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,128,1,fp8,fp8,0,1.254746675491333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,128,1,float16,float16,0,0.8124159971872965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,128,1,float16,float16,0,0.8429386615753174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,128,1,float16,fp8,0,0.7954773108164469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,128,1,float16,fp8,0,0.8310773372650146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,128,1,fp8,fp8,0,0.6546293497085571
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,128,1,float16,float16,0,0.8152799606323242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,128,1,fp8,fp8,0,0.7063999970753988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,128,1,float16,fp8,0,0.8002133369445801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,128,1,fp8,fp8,0,0.6557173331578573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,128,1,float16,float16,0,0.8189653555552164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,128,1,float16,fp8,0,0.8010719617207845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,128,1,fp8,fp8,0,0.662277340888977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,128,1,float16,float16,0,0.8206079800923666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,128,1,float16,fp8,0,0.8082559903462728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,128,1,float16,float16,0,0.44894933700561523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,128,1,float16,fp8,0,0.440778652826945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,128,1,float16,float16,0,0.45996801058451336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,128,1,fp8,fp8,0,0.6655893325805664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,128,1,fp8,fp8,0,0.3489280144373576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,128,1,float16,fp8,0,0.4532053470611572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,128,1,fp8,fp8,0,0.3807733456293742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,128,1,float16,fp8,0,0.44093867142995197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,128,1,float16,float16,0,0.4514506657918294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,128,1,fp8,fp8,0,0.35125335057576496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,128,1,float16,float16,0,0.4516853491465251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,128,1,float16,fp8,0,0.44128533204396564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,128,1,fp8,fp8,0,0.35236799716949463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,128,1,float16,float16,0,0.4541813135147095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,128,1,float16,fp8,0,0.44576001167297363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,128,1,fp8,fp8,0,0.3550186554590861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,128,1,float16,fp8,0,0.2360586722691854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,128,1,float16,float16,0,0.24050666888554892
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,128,1,float16,fp8,0,0.24782399336496988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,128,1,float16,float16,0,0.25179733832677204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,128,1,fp8,fp8,0,0.2169439991315206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,128,1,fp8,fp8,0,0.2042293349901835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,128,1,float16,float16,0,0.24200532833735147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,128,1,float16,fp8,0,0.23530133565266928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,128,1,fp8,fp8,0,0.20523732900619507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,128,1,float16,float16,0,0.2421706716219584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,128,1,float16,fp8,0,0.2374346653620402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,128,1,fp8,fp8,0,0.20593067010243735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,128,1,float16,float16,0,0.24295467138290405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,128,1,float16,fp8,0,0.2374933362007141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,128,1,fp8,fp8,0,0.20899200439453125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,128,1,float16,fp8,0,6.266159693400065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,128,1,fp8,fp8,0,5.196240107218425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,128,1,float16,fp8,0,6.255322774251302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,128,1,float16,float16,0,6.387439727783203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,128,1,float16,float16,0,6.276927947998047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,128,1,float16,float16,0,6.38585090637207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,128,1,fp8,fp8,0,5.219285329182942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,128,1,float16,fp8,0,6.424191792805989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,128,1,float16,float16,0,3.1424853006998696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,128,1,fp8,fp8,0,5.254090627034505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,128,1,float16,fp8,0,3.0519307454427085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,128,1,float16,fp8,0,6.415733337402344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,128,1,float16,fp8,0,3.256805419921875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,128,1,float16,float16,0,6.489797592163086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,128,1,float16,float16,0,3.304464022318522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,128,1,fp8,fp8,0,5.323312123616536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,128,1,fp8,fp8,0,2.6095946629842124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,128,1,fp8,fp8,0,2.86463991800944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,128,1,float16,float16,0,3.1480159759521484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,128,1,float16,fp8,0,3.0497973759969077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,128,1,fp8,fp8,0,2.61734406153361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,128,1,float16,float16,0,3.140650749206543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,128,1,float16,fp8,0,3.0545225143432617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,128,1,fp8,fp8,0,2.631903966267904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,128,1,float16,float16,0,3.167226791381836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,128,1,float16,fp8,0,3.136810620625814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,128,1,float16,float16,0,1.5695466995239258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,128,1,float16,fp8,0,1.5343732833862305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,128,1,fp8,fp8,0,1.3221546808878581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,128,1,float16,float16,0,1.6729013125101726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,128,1,fp8,fp8,0,2.677349408467611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,128,1,float16,fp8,0,1.642192045847575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,128,1,float16,fp8,0,1.5391252835591633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,128,1,float16,float16,0,1.5801226298014324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,128,1,fp8,fp8,0,1.4580906232198079
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,128,1,fp8,fp8,0,1.3210399945576985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,128,1,float16,float16,0,1.5825546582539876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,128,1,float16,fp8,0,1.5506614049275715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,128,1,fp8,fp8,0,1.3354239463806152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,128,1,float16,float16,0,1.5965226491292317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,128,1,float16,fp8,0,1.5612427393595378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,128,1,float16,float16,0,0.8192799886067709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,128,1,float16,fp8,0,0.7993653615315756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,128,1,float16,float16,0,0.8552746772766113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,128,1,fp8,fp8,0,1.3544425964355469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,128,1,float16,fp8,0,0.8506186803181967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,128,1,fp8,fp8,0,0.6876479784647623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,128,1,fp8,fp8,0,0.762762705485026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,128,1,float16,float16,0,0.8196319739023844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,128,1,float16,fp8,0,0.8016106287638346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,128,1,fp8,fp8,0,0.6910773118336996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,128,1,float16,float16,0,0.8234506448109945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,128,1,float16,fp8,0,0.8080800374348959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,128,1,fp8,fp8,0,0.6981973648071289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,128,1,float16,float16,0,0.8304959932963053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,128,1,float16,fp8,0,0.8140532970428467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,128,1,float16,float16,0,0.46120532353719074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,128,1,fp8,fp8,0,0.7066559791564941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,128,1,float16,float16,0,0.4410826762517293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,128,1,float16,fp8,0,0.43029332160949707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,128,1,float16,fp8,0,0.4578293164571126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,128,1,fp8,fp8,0,0.41156800587972003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,128,1,fp8,fp8,0,0.3753013213475545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,128,1,float16,float16,0,0.4424906571706136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,128,1,fp8,fp8,0,0.3739039897918701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,128,1,float16,fp8,0,0.43297600746154785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,128,1,float16,float16,0,0.445904016494751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,128,1,float16,fp8,0,0.4357973337173462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,128,1,fp8,fp8,0,0.3795679807662964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,128,1,float16,float16,0,0.44680531819661456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,128,1,float16,fp8,0,0.4386346737543742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,128,1,float16,float16,0,0.25086400906244916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,128,1,fp8,fp8,0,0.38413333892822266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,128,1,float16,fp8,0,0.24506133794784546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,128,1,float16,float16,0,0.2605066696802775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,128,1,float16,fp8,0,0.25752000013987225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,128,1,fp8,fp8,0,0.20307199160257974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,128,1,fp8,fp8,0,0.22362667322158813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,128,1,float16,float16,0,0.2515573302904765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,128,1,fp8,fp8,0,0.2048106590906779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,128,1,float16,fp8,0,0.24462932348251343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,128,1,float16,float16,0,0.2547146677970886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,128,1,float16,fp8,0,0.24726400772730509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,128,1,fp8,fp8,0,0.20521599054336548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,128,1,float16,float16,0,0.2549333373705546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,128,1,float16,fp8,0,0.2506879965464274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,128,1,fp8,fp8,0,0.20822399854660034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,128,1,float16,float16,0,0.13709333539009094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,128,1,float16,fp8,0,0.13506133357683817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,128,1,float16,float16,0,0.14477866888046265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,128,1,float16,fp8,0,0.1431839962800344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,128,1,fp8,fp8,0,0.13460800051689148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,128,1,fp8,fp8,0,0.12035199999809265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,128,1,float16,float16,0,0.13662933309872946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,128,1,float16,fp8,0,0.13518399993578592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,128,1,fp8,fp8,0,0.12275733550389607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,128,1,float16,fp8,0,0.13552000125249228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,128,1,fp8,fp8,0,0.12378666798273723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,128,1,float16,float16,0,0.13844266533851624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,128,1,float16,float16,0,0.1397226651509603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,128,1,float16,fp8,0,0.13581333557764688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,128,1,fp8,fp8,0,0.12680533528327942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,128,1,float16,float16,0,4.072389284769694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,128,1,fp8,fp8,0,3.489717483520508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,128,1,float16,fp8,0,3.890357335408529
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,128,1,fp8,fp8,0,3.500298817952474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,128,1,float16,fp8,0,3.9071572621663413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,128,1,float16,float16,0,4.0859785079956055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,128,1,float16,float16,0,4.109861373901367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,128,1,float16,fp8,0,3.9363253911336265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,128,1,float16,float16,0,2.017829259236654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,128,1,fp8,fp8,0,3.5238399505615234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,128,1,float16,float16,0,4.15334415435791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,128,1,float16,float16,0,2.184661388397217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,128,1,float16,fp8,0,1.9558293024698894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,128,1,float16,fp8,0,3.9570401509602866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,128,1,float16,fp8,0,2.119765281677246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,128,1,fp8,fp8,0,1.9490292867024739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,128,1,fp8,fp8,0,1.7457973162333171
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,128,1,fp8,fp8,0,3.587087949117025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,128,1,float16,float16,0,2.0212693214416504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,128,1,float16,fp8,0,1.9596320788065593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,128,1,fp8,fp8,0,1.7591733932495117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,128,1,float16,float16,0,2.028223991394043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,128,1,float16,fp8,0,1.9680533409118652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,128,1,fp8,fp8,0,1.770576000213623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,128,1,float16,float16,0,2.055258591969808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,128,1,float16,float16,0,1.0229706764221191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,128,1,float16,fp8,0,0.9975146452585856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,128,1,float16,fp8,0,1.9990347226460774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,128,1,fp8,fp8,0,1.7983999252319336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,128,1,float16,float16,0,1.0982399781545003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,128,1,fp8,fp8,0,0.8910026550292969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,128,1,float16,float16,0,1.0296693642934163
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,128,1,float16,fp8,0,1.0869119962056477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,128,1,float16,fp8,0,0.9997546672821045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,128,1,fp8,fp8,0,0.9975306987762451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,128,1,fp8,fp8,0,0.8970133463541666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,128,1,float16,fp8,0,1.0086133480072021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,128,1,float16,float16,0,1.0320053100585938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,128,1,fp8,fp8,0,0.9023199876149496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,128,1,float16,float16,0,1.0416106383005779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,128,1,fp8,fp8,0,0.9158559640248617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,128,1,float16,fp8,0,1.020410696665446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,128,1,float16,float16,0,0.5400906801223755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,128,1,float16,fp8,0,0.5254559914271036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,128,1,float16,float16,0,0.5706666707992554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,128,1,float16,fp8,0,0.5664746761322021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,128,1,fp8,fp8,0,0.4724746545155843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,128,1,fp8,fp8,0,0.5254079898198446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,128,1,float16,float16,0,0.5401440064112345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,128,1,fp8,fp8,0,0.47259199619293213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,128,1,float16,fp8,0,0.527567982673645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,128,1,float16,float16,0,0.5433973471323649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,128,1,float16,fp8,0,0.5296106735865275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,128,1,fp8,fp8,0,0.47723201910654706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,128,1,float16,float16,0,0.5477333466211954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,128,1,fp8,fp8,0,0.484554648399353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,128,1,float16,fp8,0,0.5367573499679565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,128,1,float16,float16,0,0.31112533807754517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,128,1,fp8,fp8,0,0.28650132815043133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,128,1,float16,fp8,0,0.30952000617980957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,128,1,float16,float16,0,0.2944800059000651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,128,1,fp8,fp8,0,0.25150932868321735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,128,1,float16,fp8,0,0.2877333362897237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,128,1,float16,float16,0,0.29581334193547565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,128,1,float16,fp8,0,0.28958932558695477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,128,1,fp8,fp8,0,0.25219200054804486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,128,1,float16,float16,0,0.29796799023946124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,128,1,float16,float16,0,0.3004480004310608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,128,1,float16,fp8,0,0.2953813274701436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,128,1,float16,fp8,0,0.2911840081214905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,128,1,fp8,fp8,0,0.2573866645495097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,128,1,float16,float16,0,0.1744906703631083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,128,1,float16,fp8,0,0.17126933733622232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,128,1,fp8,fp8,0,0.2529226740201314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,128,1,fp8,fp8,0,0.1588266690572103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,128,1,float16,float16,0,0.15934933225313822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,128,1,float16,fp8,0,0.15614933768908182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,128,1,float16,float16,0,0.15958933035532633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,128,1,float16,fp8,0,0.1562986671924591
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,128,1,fp8,fp8,0,0.14471999804178873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,128,1,fp8,fp8,0,0.14379200339317322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,128,1,float16,fp8,0,0.15685866276423135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,128,1,fp8,fp8,0,0.14546666542689005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,128,1,float16,float16,0,0.16054933269818625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,128,1,float16,fp8,0,0.157151997089386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,128,1,fp8,fp8,0,0.1474240024884542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,128,1,float16,float16,0,0.10296533505121867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,128,1,float16,float16,0,0.16294933358828226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,128,1,float16,float16,0,0.09682133793830872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,128,1,fp8,fp8,0,0.09935999910036723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,128,1,float16,fp8,0,0.09517332911491394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,128,1,float16,fp8,0,0.1018453339735667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,128,1,fp8,fp8,0,0.08756267031033833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,128,1,float16,fp8,0,0.09532266855239868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,128,1,float16,float16,0,0.09662933150927226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,128,1,float16,float16,0,0.09717866778373718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,128,1,float16,fp8,0,0.09490133325258891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,128,1,fp8,fp8,0,0.08830400307973225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,128,1,fp8,fp8,0,0.08887466788291931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,128,1,float16,fp8,0,0.0960746705532074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,128,1,fp8,fp8,0,0.09111467003822327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,128,1,float16,float16,0,0.09868799646695454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,128,1,float16,fp8,0,3.8324000040690103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,128,1,float16,fp8,0,3.8633174896240234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,128,1,float16,float16,0,3.8918612798055015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,128,1,float16,float16,0,3.843258539835612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,128,1,fp8,fp8,0,3.5663038889567056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,128,1,float16,float16,0,3.941157341003418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,128,1,fp8,fp8,0,3.4507201512654624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,128,1,float16,fp8,0,4.001706759134929
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,128,1,float16,float16,0,1.904703934987386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,128,1,fp8,fp8,0,3.5848747889200845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,128,1,float16,float16,0,4.027557373046875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,128,1,float16,float16,0,2.2501279513041177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,128,1,float16,fp8,0,2.2110026677449546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,128,1,fp8,fp8,0,3.7298240661621094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,128,1,float16,fp8,0,1.9027892748514812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,128,1,fp8,fp8,0,2.004074732462565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,128,1,fp8,fp8,0,1.711114724477132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,128,1,float16,fp8,0,4.115850766499837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,128,1,float16,float16,0,1.927669366200765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,128,1,float16,fp8,0,1.940608024597168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,128,1,fp8,fp8,0,1.7735466957092285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,128,1,float16,float16,0,1.9541385968526204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,128,1,float16,fp8,0,1.959994633992513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,128,1,fp8,fp8,0,1.7983412742614746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,128,1,float16,float16,0,2.006901264190674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,128,1,float16,fp8,0,2.0374719301859536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,128,1,float16,float16,0,0.9692853291829427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,128,1,float16,fp8,0,0.9662133057912191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,128,1,fp8,fp8,0,1.867263952891032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,128,1,float16,float16,0,1.1367093722025554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,128,1,fp8,fp8,0,0.8608853022257487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,128,1,float16,fp8,0,1.1178452968597412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,128,1,fp8,fp8,0,1.0107839902242024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,128,1,fp8,fp8,0,0.9041333198547363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,128,1,float16,fp8,0,0.9822186628977457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,128,1,float16,float16,0,0.9782719612121582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,128,1,float16,fp8,0,0.9897066752115885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,128,1,fp8,fp8,0,0.9082346757253011
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,128,1,float16,float16,0,0.9844746589660645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,128,1,float16,float16,0,1.0025333563486736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,128,1,float16,float16,0,0.5002293189366659
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,128,1,float16,fp8,0,1.0112160046895344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,128,1,fp8,fp8,0,0.942074696222941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,128,1,float16,fp8,0,0.5006080071131388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,128,1,fp8,fp8,0,0.4398719867070516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,128,1,float16,fp8,0,0.5653653144836426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,128,1,float16,float16,0,0.5796586672465006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,128,1,float16,float16,0,0.5041759808858236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,128,1,float16,fp8,0,0.508458654085795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,128,1,fp8,fp8,0,0.45609601338704425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,128,1,fp8,fp8,0,0.5114293495814005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,128,1,float16,fp8,0,0.510474681854248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,128,1,fp8,fp8,0,0.4645226796468099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,128,1,float16,float16,0,0.5064533154169718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,128,1,float16,float16,0,0.5161120096842448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,128,1,float16,fp8,0,0.5164373318354288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,128,1,float16,float16,0,0.30291199684143066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,128,1,float16,fp8,0,0.264357328414917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,128,1,fp8,fp8,0,0.4837546745936076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,128,1,float16,fp8,0,0.2946293354034424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,128,1,fp8,fp8,0,0.2338506579399109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,128,1,float16,float16,0,0.2661386728286743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,128,1,fp8,fp8,0,0.26825066407521564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,128,1,float16,float16,0,0.266159991423289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,128,1,float16,fp8,0,0.26766399542490643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,128,1,float16,float16,0,0.26878400643666583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,128,1,float16,fp8,0,0.2691093285878499
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,128,1,fp8,fp8,0,0.24099733432133993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,128,1,fp8,fp8,0,0.24260266621907553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,128,1,float16,fp8,0,0.27245332797368366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,128,1,float16,float16,0,0.2727946639060974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,128,1,fp8,fp8,0,0.25135467449824017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,128,1,float16,fp8,0,0.14522666732470194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,128,1,float16,float16,0,0.1657919983069102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,128,1,fp8,fp8,0,0.12271466851234436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,128,1,float16,float16,0,0.14448533455530801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,128,1,fp8,fp8,0,0.13929067055384317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,128,1,float16,fp8,0,0.16264533003171286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,128,1,float16,float16,0,0.14645333091417947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,128,1,float16,fp8,0,0.1458080013593038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,128,1,float16,float16,0,0.14849600195884705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,128,1,fp8,fp8,0,0.1260533332824707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,128,1,float16,float16,0,0.15027733643849692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,128,1,fp8,fp8,0,0.12543466687202454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,128,1,float16,fp8,0,0.1507253348827362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,128,1,float16,fp8,0,0.14867732922236124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,128,1,float16,float16,0,0.08896000186602275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,128,1,float16,float16,0,0.07656533519426982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,128,1,float16,fp8,0,0.07713066538174947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,128,1,fp8,fp8,0,0.1297920048236847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,128,1,fp8,fp8,0,0.06762666503588359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,128,1,float16,fp8,0,0.08752533793449402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,128,1,fp8,fp8,0,0.07993066807587941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,128,1,float16,float16,0,0.0763679991165797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,128,1,float16,fp8,0,0.0772213339805603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,128,1,fp8,fp8,0,0.0680320014556249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,128,1,fp8,fp8,0,0.0689386675755183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,128,1,float16,float16,0,0.07727999985218048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,128,1,float16,float16,0,0.07962133487065633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,128,1,float16,fp8,0,0.07805866499741872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,128,1,float16,fp8,0,0.07928533355395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,128,1,float16,float16,0,0.050944000482559204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,128,1,float16,fp8,0,0.04970666766166687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,128,1,fp8,fp8,0,0.07294933497905731
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,128,1,float16,fp8,0,0.04540266593297323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,128,1,float16,float16,0,0.04549333453178406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,128,1,fp8,fp8,0,0.04842666784922282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,128,1,fp8,fp8,0,0.041802664597829185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,128,1,float16,float16,0,0.045706664522488914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,128,1,float16,fp8,0,0.045754666129748024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,128,1,fp8,fp8,0,0.04154133299986521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,128,1,float16,fp8,0,0.04535466432571411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,128,1,fp8,fp8,0,0.04237333436806997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,128,1,float16,float16,0,0.04619200030962626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,128,1,float16,float16,0,0.045466666420300804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,128,1,fp8,fp8,0,0.04340800146261851
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,128,1,float16,fp8,0,0.046165332198143005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,1,128,1,float16,float16,0,2.991135915120443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,1,128,1,fp8,fp8,0,2.858938535054525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,1,128,1,float16,fp8,0,2.9453118642171225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,2,128,1,float16,fp8,0,3.0160961151123047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,2,128,1,float16,float16,0,3.0263681411743164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,4,128,1,float16,float16,0,3.0675573348999023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,4,128,1,float16,fp8,0,3.1147839228312173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,2,128,1,fp8,fp8,0,2.9653971989949546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,1,128,1,float16,float16,0,1.4763840039571126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,4,128,1,fp8,fp8,0,2.986175855000814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,8,128,1,float16,float16,0,3.211968104044596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,8,128,1,float16,fp8,0,3.2103894551595054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,1,128,1,float16,fp8,0,1.4742132822672527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,32,8,128,1,fp8,fp8,0,3.1322453816731772
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,32,128,1,float16,float16,0,1.8493812878926594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,32,128,1,float16,fp8,0,1.7952639261881511
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,1,128,1,fp8,fp8,0,1.417103926340739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,32,128,1,fp8,fp8,0,1.7083199818929036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,2,128,1,float16,float16,0,1.5021227200826008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,2,128,1,float16,fp8,0,1.502570629119873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,4,128,1,float16,float16,0,1.5255573590596516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,4,128,1,float16,fp8,0,1.5399360656738281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,2,128,1,fp8,fp8,0,1.4777812957763672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,4,128,1,fp8,fp8,0,1.5044053395589192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,8,128,1,float16,float16,0,1.5722773869832356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,8,128,1,float16,fp8,0,1.606239954630534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,1,128,1,float16,float16,0,0.7512426376342773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,1,128,1,float16,fp8,0,0.7521866957346598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,1,128,1,fp8,fp8,0,0.7070559660593668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,32,8,128,1,fp8,fp8,0,1.5708853403727214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,32,128,1,float16,float16,0,0.9324906667073568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,2,128,1,float16,float16,0,0.7605546315511068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,2,128,1,fp8,fp8,0,0.7436052958170573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,32,128,1,float16,fp8,0,0.9047306378682455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,32,128,1,fp8,fp8,0,0.8570346832275391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,2,128,1,float16,fp8,0,0.7649439970652262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,4,128,1,float16,float16,0,0.7682720025380453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,4,128,1,float16,fp8,0,0.7777493000030518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,4,128,1,fp8,fp8,0,0.7565546830495199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,1,128,1,float16,float16,0,0.3872640132904053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,8,128,1,float16,fp8,0,0.7903467019399008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,8,128,1,fp8,fp8,0,0.7880533536275228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,32,128,1,float16,float16,0,0.46862932046254474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,32,8,128,1,float16,float16,0,0.7868320147196451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,1,128,1,float16,fp8,0,0.38632531960805255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,32,128,1,float16,fp8,0,0.45841066042582196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,1,128,1,fp8,fp8,0,0.36345601081848145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,32,128,1,fp8,fp8,0,0.4357120196024577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,2,128,1,fp8,fp8,0,0.377893328666687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,2,128,1,float16,fp8,0,0.3932853142420451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,2,128,1,float16,float16,0,0.3909120162328084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,4,128,1,float16,fp8,0,0.3972586790720622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,4,128,1,float16,float16,0,0.396506667137146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,8,128,1,float16,float16,0,0.4048853317896525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,8,128,1,float16,fp8,0,0.40533332029978436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,4,128,1,fp8,fp8,0,0.3824479977289836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,32,8,128,1,fp8,fp8,0,0.40356798966725665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,32,128,1,float16,float16,0,0.24800533056259155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,1,128,1,float16,float16,0,0.20769067605336508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,32,128,1,fp8,fp8,0,0.22766399383544922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,32,128,1,float16,fp8,0,0.24097599585851034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,1,128,1,float16,fp8,0,0.20708799362182617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,1,128,1,fp8,fp8,0,0.19336533546447754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,2,128,1,float16,fp8,0,0.20668800671895346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,2,128,1,fp8,fp8,0,0.2000746726989746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,4,128,1,float16,fp8,0,0.20989867051442465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,2,128,1,float16,float16,0,0.206821342309316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,4,128,1,fp8,fp8,0,0.20169599850972494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,4,128,1,float16,float16,0,0.2101866602897644
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,8,128,1,float16,float16,0,0.21559999386469522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,8,128,1,float16,fp8,0,0.21409066518147787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,32,8,128,1,fp8,fp8,0,0.2098346749941508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,32,128,1,float16,float16,0,0.13514133294423422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,32,128,1,float16,fp8,0,0.13146133224169412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,1,128,1,float16,fp8,0,0.11355200409889221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,1,128,1,float16,float16,0,0.11326400438944499
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,32,128,1,fp8,fp8,0,0.12238933642705281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,1,128,1,fp8,fp8,0,0.10623466968536377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,2,128,1,float16,float16,0,0.11404800415039062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,2,128,1,fp8,fp8,0,0.10884267091751099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,2,128,1,float16,fp8,0,0.11348799864451091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,4,128,1,float16,float16,0,0.11518399914105733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,4,128,1,fp8,fp8,0,0.10930132865905762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,8,128,1,float16,fp8,0,0.1174773375193278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,8,128,1,fp8,fp8,0,0.11386666695276897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,4,128,1,float16,fp8,0,0.11609599987665813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,32,128,1,float16,float16,0,0.07634133100509644
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,32,128,1,fp8,fp8,0,0.07074666519959767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,32,8,128,1,float16,float16,0,0.11854933698972066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,32,128,1,float16,fp8,0,0.07530666887760162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,1,128,1,float16,fp8,0,0.0637066662311554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,1,128,1,fp8,fp8,0,0.05798399945100149
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,1,128,1,float16,float16,0,0.06332266827424367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,4,128,1,float16,fp8,0,0.0645653357108434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,2,128,1,float16,fp8,0,0.06324266890684764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,2,128,1,fp8,fp8,0,0.05948266883691152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,4,128,1,float16,float16,0,0.06478933493296306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,2,128,1,float16,float16,0,0.0645546664794286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,4,128,1,fp8,fp8,0,0.0612960010766983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,8,128,1,float16,float16,0,0.06588266789913177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,8,128,1,fp8,fp8,0,0.06429333488146464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,32,8,128,1,float16,fp8,0,0.06681066751480103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,32,128,1,float16,float16,0,0.04423999786376953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,32,128,1,float16,fp8,0,0.04350399971008301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,32,128,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,1,128,1,float16,fp8,0,0.03884266565243403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,1,128,1,float16,float16,0,0.03905600061019262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,2,128,1,float16,float16,0,0.0391893337170283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,1,128,1,fp8,fp8,0,0.035989334185918175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,2,128,1,fp8,fp8,0,0.035904000202814736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,4,128,1,float16,float16,0,0.03908266623814901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,2,128,1,float16,fp8,0,0.03885333240032196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,4,128,1,fp8,fp8,0,0.036357333262761436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,8,128,1,float16,fp8,0,0.0399893323580424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,8,128,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,8,128,1,float16,float16,0,0.03972800076007843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,32,128,1,float16,fp8,0,0.030245333909988403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,32,128,1,float16,float16,0,0.030181333422660828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,32,4,128,1,float16,fp8,0,0.03937066594759623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,1,128,1,float16,float16,0,0.027445333699385326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,1,128,1,float16,fp8,0,0.028229333460330963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,1,128,1,fp8,fp8,0,0.026602665583292644
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,32,128,1,fp8,fp8,0,0.028042666614055634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,2,128,1,float16,fp8,0,0.02812266598145167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,2,128,1,fp8,fp8,0,0.026394667724768322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,2,128,1,float16,float16,0,0.027776000400384266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,4,128,1,float16,fp8,0,0.028005334238211315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,4,128,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,8,128,1,float16,float16,0,0.02870933214823405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,8,128,1,fp8,fp8,0,0.028143999477227528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,4,128,1,float16,float16,0,0.028565332293510437
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,32,8,128,1,float16,fp8,0,0.02847466617822647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,1,128,1,fp8,fp8,0,1.231002648671468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,1,128,1,float16,fp8,0,1.2630720138549805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,1,128,1,float16,float16,0,1.2628106276194255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,2,128,1,fp8,fp8,0,1.2783146699269612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,2,128,1,float16,fp8,0,1.285871982574463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,2,128,1,float16,float16,0,1.2842186292012532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,4,128,1,float16,float16,0,1.3008586565653484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,4,128,1,float16,fp8,0,1.3242560227711995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,4,128,1,fp8,fp8,0,1.3224159876505535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,1,128,1,float16,float16,0,0.6410986582438151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,8,128,1,float16,fp8,0,1.3794293403625488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,32,128,1,float16,float16,0,0.8263786633809408
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,8,128,1,float16,float16,0,1.3483840624491374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,32,8,128,1,fp8,fp8,0,1.383306662241618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,32,128,1,float16,fp8,0,0.7959679762522379
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,1,128,1,float16,fp8,0,0.6406399806340536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,32,128,1,fp8,fp8,0,0.7749120394388834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,1,128,1,fp8,fp8,0,0.6166346470514933
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,2,128,1,float16,float16,0,0.6512106657028198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,2,128,1,float16,fp8,0,0.6557279825210571
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,2,128,1,fp8,fp8,0,0.6470773220062256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,4,128,1,float16,fp8,0,0.6594186623891195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,4,128,1,float16,float16,0,0.6574720144271851
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,4,128,1,fp8,fp8,0,0.6620213190714518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,1,128,1,float16,float16,0,0.33035733302434284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,8,128,1,float16,float16,0,0.6726986567179362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,1,128,1,float16,fp8,0,0.3311520020167033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,8,128,1,float16,fp8,0,0.6795893510182699
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,32,128,1,float16,float16,0,0.41793068250020343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,32,8,128,1,fp8,fp8,0,0.6984106699625651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,32,128,1,float16,fp8,0,0.40165865421295166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,1,128,1,fp8,fp8,0,0.31725867589314777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,32,128,1,fp8,fp8,0,0.3940533399581909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,2,128,1,float16,float16,0,0.33452268441518146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,2,128,1,float16,fp8,0,0.3382186492284139
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,4,128,1,float16,float16,0,0.33795734246571857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,4,128,1,float16,fp8,0,0.34138135115305585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,2,128,1,fp8,fp8,0,0.33080534140268963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,4,128,1,fp8,fp8,0,0.3378026485443115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,8,128,1,float16,float16,0,0.34594134489695233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,8,128,1,float16,fp8,0,0.34859732786814374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,1,128,1,float16,fp8,0,0.17710399627685547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,1,128,1,float16,float16,0,0.17648533980051676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,1,128,1,fp8,fp8,0,0.1709280014038086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,2,128,1,float16,float16,0,0.17735999822616577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,32,8,128,1,fp8,fp8,0,0.3574879964192708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,32,128,1,float16,float16,0,0.21916800737380981
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,32,128,1,float16,fp8,0,0.212336003780365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,32,128,1,fp8,fp8,0,0.20734399557113647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,2,128,1,float16,fp8,0,0.17806400855382284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,4,128,1,float16,float16,0,0.17941866318384805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,2,128,1,fp8,fp8,0,0.17636799812316895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,4,128,1,float16,fp8,0,0.17986132701237997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,8,128,1,float16,float16,0,0.18563199043273926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,8,128,1,float16,fp8,0,0.18464533487955728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,8,128,1,fp8,fp8,0,0.18769599994023642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,32,4,128,1,fp8,fp8,0,0.1798293391863505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,32,128,1,float16,float16,0,0.12136000394821167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,1,128,1,float16,float16,0,0.09763200084368388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,1,128,1,float16,fp8,0,0.09710933764775594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,1,128,1,fp8,fp8,0,0.096261332432429
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,2,128,1,float16,float16,0,0.09789333740870158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,2,128,1,float16,fp8,0,0.09797867139180501
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,32,128,1,float16,fp8,0,0.11844266454378764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,32,128,1,fp8,fp8,0,0.11437867085138957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,2,128,1,fp8,fp8,0,0.09774399797121684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,4,128,1,float16,fp8,0,0.09992000460624695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,4,128,1,float16,float16,0,0.09891200065612793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,4,128,1,fp8,fp8,0,0.09928533434867859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,8,128,1,fp8,fp8,0,0.10241066416104634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,8,128,1,float16,float16,0,0.10120532910029094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,32,128,1,float16,float16,0,0.07437866429487865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,1,128,1,float16,fp8,0,0.057114665706952415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,32,8,128,1,float16,fp8,0,0.10180266698201497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,1,128,1,float16,float16,0,0.05685866872469584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,32,128,1,float16,fp8,0,0.07148799796899159
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,1,128,1,fp8,fp8,0,0.05372266471385956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,32,128,1,fp8,fp8,0,0.06654400130112965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,2,128,1,float16,float16,0,0.05748266478379568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,2,128,1,float16,fp8,0,0.05764266848564148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,2,128,1,fp8,fp8,0,0.05455466608206431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,4,128,1,float16,fp8,0,0.058821335434913635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,4,128,1,fp8,fp8,0,0.05486933390299479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,8,128,1,float16,fp8,0,0.06020266811052958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,8,128,1,float16,float16,0,0.059343998630841575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,4,128,1,float16,float16,0,0.05783999959627787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,32,8,128,1,fp8,fp8,0,0.059263999263445534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,32,128,1,float16,fp8,0,0.04109866668780645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,32,128,1,float16,float16,0,0.0405973345041275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,1,128,1,float16,float16,0,0.036202666660149894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,1,128,1,float16,fp8,0,0.03613866617282232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,32,128,1,fp8,fp8,0,0.039349332451820374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,1,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,2,128,1,float16,fp8,0,0.03612799942493439
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,2,128,1,float16,float16,0,0.03589866558710734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,2,128,1,fp8,fp8,0,0.03326933334271113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,4,128,1,float16,fp8,0,0.03610666592915853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,4,128,1,fp8,fp8,0,0.033728001018365227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,8,128,1,float16,float16,0,0.0367999995748202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,8,128,1,float16,fp8,0,0.036933332681655884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,8,128,1,fp8,fp8,0,0.035258665680885315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,32,4,128,1,float16,float16,0,0.03660800059636434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,32,128,1,float16,float16,0,0.026389333109060924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,32,128,1,float16,fp8,0,0.02716800073782603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,1,128,1,float16,float16,0,0.024933333198229473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,32,128,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,1,128,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,1,128,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,2,128,1,float16,float16,0,0.02495466669400533
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,2,128,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,4,128,1,float16,float16,0,0.025781333446502686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,8,128,1,float16,float16,0,0.025648000339667004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,4,128,1,fp8,fp8,0,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,2,128,1,fp8,fp8,0,0.0229066660006841
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,8,128,1,fp8,fp8,0,0.024826665719350178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,4,128,1,float16,fp8,0,0.025663999219735462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,32,128,1,float16,float16,0,0.022522665560245514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,32,128,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,32,8,128,1,float16,fp8,0,0.025920001169045765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,1,128,1,float16,float16,0,0.02201066662867864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,32,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,1,128,1,fp8,fp8,0,0.020469332734743755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,2,128,1,float16,float16,0,0.021664001047611237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,2,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,1,128,1,float16,fp8,0,0.021669333179791767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,2,128,1,float16,fp8,0,0.02179733415444692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,4,128,1,float16,fp8,0,0.022309333086013794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,4,128,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,8,128,1,float16,float16,0,0.02216533323129018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,8,128,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,8,128,1,float16,fp8,0,0.022117334107557934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,32,4,128,1,float16,float16,0,0.024245334168275196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,1,128,1,float16,float16,0,0.5860000054041544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,1,128,1,float16,fp8,0,0.5861866474151611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,2,128,1,fp8,fp8,0,0.6485280195871989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,2,128,1,float16,float16,0,0.5981653531392416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,4,128,1,float16,float16,0,0.6079893509546915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,2,128,1,float16,fp8,0,0.6020106474558512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,1,128,1,fp8,fp8,0,0.6147040128707886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,4,128,1,float16,fp8,0,0.6090666850407919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,1,128,1,float16,float16,0,0.3039413293202718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,4,128,1,fp8,fp8,0,0.6627200047175089
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,8,128,1,float16,fp8,0,0.6290666659673055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,32,128,1,float16,float16,0,0.4028799931208293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,1,128,1,float16,fp8,0,0.3028800090154012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,32,128,1,fp8,fp8,0,0.39349865913391113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,8,128,1,float16,float16,0,0.6254186630249023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,32,8,128,1,fp8,fp8,0,0.6944746971130371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,32,128,1,float16,fp8,0,0.3882506688435872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,1,128,1,fp8,fp8,0,0.31542932987213135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,2,128,1,float16,float16,0,0.3086880048116048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,2,128,1,float16,fp8,0,0.3091040054957072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,2,128,1,fp8,fp8,0,0.3288000027338664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,4,128,1,float16,float16,0,0.3132479985555013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,4,128,1,fp8,fp8,0,0.33593066533406574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,8,128,1,float16,float16,0,0.3224266568819682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,4,128,1,float16,fp8,0,0.3159839908281962
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,8,128,1,float16,fp8,0,0.3221759994824727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,1,128,1,float16,float16,0,0.16457600394884744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,1,128,1,float16,fp8,0,0.16450132926305136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,32,8,128,1,fp8,fp8,0,0.3556160132090251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,32,128,1,float16,fp8,0,0.20967467625935873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,1,128,1,fp8,fp8,0,0.17071467638015747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,32,128,1,float16,float16,0,0.21951999266942343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,32,128,1,fp8,fp8,0,0.2074026664098104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,2,128,1,float16,float16,0,0.16570666432380676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,2,128,1,float16,fp8,0,0.16699733336766562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,2,128,1,fp8,fp8,0,0.17617066701253256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,4,128,1,float16,fp8,0,0.16781334082285562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,4,128,1,float16,float16,0,0.16841065883636475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,8,128,1,float16,float16,0,0.1738719940185547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,4,128,1,fp8,fp8,0,0.1795253356297811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,8,128,1,float16,fp8,0,0.17377066612243652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,32,128,1,float16,float16,0,0.12250666817029317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,32,8,128,1,fp8,fp8,0,0.18622400363286337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,1,128,1,float16,float16,0,0.09241599837938945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,32,128,1,float16,fp8,0,0.11784533659617107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,1,128,1,float16,fp8,0,0.09193600217501323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,32,128,1,fp8,fp8,0,0.11306666334470113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,2,128,1,float16,float16,0,0.0929813285668691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,2,128,1,float16,fp8,0,0.09212266405423482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,1,128,1,fp8,fp8,0,0.0953439970811208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,4,128,1,float16,float16,0,0.09353599945704143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,4,128,1,float16,fp8,0,0.09307199716567993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,2,128,1,fp8,fp8,0,0.0965333382288615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,4,128,1,fp8,fp8,0,0.09832533200581868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,8,128,1,float16,float16,0,0.09634666641553243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,8,128,1,float16,fp8,0,0.09668800234794617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,32,8,128,1,fp8,fp8,0,0.1021066705385844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,32,128,1,float16,fp8,0,0.06857066849867503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,1,128,1,float16,fp8,0,0.05341866612434387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,1,128,1,float16,float16,0,0.05373333394527435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,1,128,1,fp8,fp8,0,0.05411200225353241
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,32,128,1,fp8,fp8,0,0.06644266843795776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,32,128,1,float16,float16,0,0.07076266904671986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,2,128,1,float16,fp8,0,0.05448000133037567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,2,128,1,fp8,fp8,0,0.05426133175690969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,2,128,1,float16,float16,0,0.05475200215975443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,4,128,1,float16,float16,0,0.05417066812515259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,8,128,1,float16,float16,0,0.05624000231424967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,4,128,1,fp8,fp8,0,0.05569066603978475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,8,128,1,float16,fp8,0,0.05635733405749003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,8,128,1,fp8,fp8,0,0.05869866907596588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,32,128,1,float16,float16,0,0.03909866760174433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,32,4,128,1,float16,fp8,0,0.05468266705671946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,32,128,1,float16,fp8,0,0.03885866701602936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,32,128,1,fp8,fp8,0,0.03878933439652125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,1,128,1,float16,float16,0,0.0345920001467069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,1,128,1,float16,fp8,0,0.03485333422819773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,1,128,1,fp8,fp8,0,0.03323733309904734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,2,128,1,float16,fp8,0,0.035061334570248924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,2,128,1,float16,float16,0,0.03481066723664602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,4,128,1,float16,float16,0,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,2,128,1,fp8,fp8,0,0.05096533397833506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,4,128,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,4,128,1,float16,fp8,0,0.0348693331082662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,8,128,1,fp8,fp8,0,0.03493333359559377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,8,128,1,float16,fp8,0,0.03562666724125544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,32,128,1,float16,float16,0,0.025834667185942333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,1,128,1,float16,float16,0,0.024362665911515553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,32,128,1,float16,fp8,0,0.02573866645495097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,1,128,1,float16,fp8,0,0.024277334411938984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,32,8,128,1,float16,float16,0,0.03512533257404963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,1,128,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,32,128,1,fp8,fp8,0,0.024986666937669117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,2,128,1,float16,float16,0,0.024085332949956257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,2,128,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,2,128,1,float16,fp8,0,0.02442666639884313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,4,128,1,float16,float16,0,0.02476799984773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,4,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,4,128,1,fp8,fp8,0,0.02386133372783661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,8,128,1,float16,float16,0,0.024613333245118458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,32,128,1,float16,float16,0,0.02086399992307027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,8,128,1,float16,fp8,0,0.04623466730117798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,32,8,128,1,fp8,fp8,0,0.02463999887307485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,32,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,32,128,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,1,128,1,float16,float16,0,0.02060266708334287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,1,128,1,fp8,fp8,0,0.020527999848127365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,1,128,1,float16,fp8,0,0.020634666085243225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,2,128,1,float16,float16,0,0.020309332758188248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,4,128,1,float16,float16,0,0.020687999824682873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,2,128,1,fp8,fp8,0,0.020560000091791153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,4,128,1,float16,fp8,0,0.02060266708334287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,4,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,2,128,1,float16,fp8,0,0.02063999945918719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,8,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,8,128,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,32,8,128,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,32,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,32,128,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,32,128,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,1,128,1,float16,float16,0,0.019066666563351948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,1,128,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,2,128,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,1,128,1,float16,fp8,0,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,2,128,1,fp8,fp8,0,0.01966933285196622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,2,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,4,128,1,float16,float16,0,0.019738666713237762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,4,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,4,128,1,float16,fp8,0,0.01970133309563001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,8,128,1,float16,float16,0,0.019637333850065868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,8,128,1,float16,fp8,0,0.019706666469573975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,32,8,128,1,fp8,fp8,0,0.020527999848127365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,1,128,1,float16,float16,0,0.38228265444437665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,1,128,1,fp8,fp8,0,0.4193546772003174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,2,128,1,float16,float16,0,0.38655467828114826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,1,128,1,float16,fp8,0,0.3822400172551473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,2,128,1,fp8,fp8,0,0.43461334705352783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,2,128,1,float16,fp8,0,0.38874133427937824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,4,128,1,float16,float16,0,0.39077866077423096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,4,128,1,float16,fp8,0,0.3920053243637085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,4,128,1,fp8,fp8,0,0.44623998800913495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,8,128,1,float16,float16,0,0.4018719991048177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,32,128,1,float16,float16,0,0.23432532946268717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,8,128,1,float16,fp8,0,0.3999413251876831
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,1,128,1,float16,float16,0,0.20150399208068848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,32,8,128,1,fp8,fp8,0,0.4665813446044922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,32,128,1,float16,fp8,0,0.22523732980092367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,32,128,1,fp8,fp8,0,0.2561013301213582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,1,128,1,float16,fp8,0,0.20232532421747842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,1,128,1,fp8,fp8,0,0.22236265738805136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,2,128,1,float16,float16,0,0.20317333936691284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,2,128,1,float16,fp8,0,0.20374933878580728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,2,128,1,fp8,fp8,0,0.22843732436498007
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,4,128,1,float16,fp8,0,0.2046026587486267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,4,128,1,float16,float16,0,0.20508799950281778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,4,128,1,fp8,fp8,0,0.232095996538798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,8,128,1,float16,float16,0,0.2104640007019043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,1,128,1,float16,fp8,0,0.11031466722488403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,8,128,1,float16,fp8,0,0.2097973426183065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,1,128,1,float16,float16,0,0.10958400368690491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,32,8,128,1,fp8,fp8,0,0.2410879929860433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,32,128,1,float16,float16,0,0.12691199779510498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,32,128,1,float16,fp8,0,0.1225386659304301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,32,128,1,fp8,fp8,0,0.1397119959195455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,1,128,1,fp8,fp8,0,0.12150933345158894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,2,128,1,float16,float16,0,0.10961600144704182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,2,128,1,float16,fp8,0,0.11028800408045451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,4,128,1,float16,fp8,0,0.11151466766993205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,2,128,1,fp8,fp8,0,0.12344533205032349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,4,128,1,fp8,fp8,0,0.12433600425720215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,4,128,1,float16,float16,0,0.11090133587519328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,8,128,1,float16,float16,0,0.11399466792742412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,8,128,1,float16,fp8,0,0.11337600151697795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,1,128,1,float16,fp8,0,0.06252266466617584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,32,8,128,1,fp8,fp8,0,0.12872533003489176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,1,128,1,float16,float16,0,0.0617439995209376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,32,128,1,float16,float16,0,0.07269333302974701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,32,128,1,float16,fp8,0,0.07017066578070323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,1,128,1,fp8,fp8,0,0.06779199838638306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,32,128,1,fp8,fp8,0,0.07895466685295105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,2,128,1,float16,fp8,0,0.06253333389759064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,2,128,1,float16,float16,0,0.06225599845250448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,4,128,1,float16,float16,0,0.0629066675901413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,4,128,1,float16,fp8,0,0.0633546660343806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,2,128,1,fp8,fp8,0,0.068271999557813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,4,128,1,fp8,fp8,0,0.0690826674302419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,8,128,1,float16,float16,0,0.0645066648721695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,8,128,1,fp8,fp8,0,0.07300266623497009
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,32,8,128,1,float16,fp8,0,0.06484266618887584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,1,128,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,1,128,1,float16,float16,0,0.03792533278465271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,32,128,1,float16,fp8,0,0.0406986673672994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,32,128,1,fp8,fp8,0,0.04663466910521189
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,32,128,1,float16,float16,0,0.04008533308903376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,1,128,1,fp8,fp8,0,0.04085333396991094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,2,128,1,float16,float16,0,0.038293334345022835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,2,128,1,float16,fp8,0,0.038319999972979225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,2,128,1,fp8,fp8,0,0.04033066580692927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,4,128,1,float16,float16,0,0.03828799972931544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,4,128,1,float16,fp8,0,0.0383146678407987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,4,128,1,fp8,fp8,0,0.04203199843565623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,8,128,1,fp8,fp8,0,0.041877334316571556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,8,128,1,float16,fp8,0,0.03926933308442434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,32,8,128,1,float16,float16,0,0.03908266623814901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,32,128,1,fp8,fp8,0,0.028751999139785767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,32,128,1,float16,float16,0,0.02717333287000656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,1,128,1,float16,float16,0,0.02571200082699458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,1,128,1,float16,fp8,0,0.02629866699377696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,32,128,1,float16,fp8,0,0.026816000541051228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,1,128,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,2,128,1,float16,float16,0,0.025914666553338368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,2,128,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,4,128,1,float16,fp8,0,0.026517334083716076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,4,128,1,fp8,fp8,0,0.028186666468779247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,8,128,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,8,128,1,float16,float16,0,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,2,128,1,float16,fp8,0,0.026165333886941273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,4,128,1,float16,float16,0,0.026144000391165417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,32,8,128,1,fp8,fp8,0,0.028405333558718365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,32,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,1,128,1,float16,float16,0,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,32,128,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,32,128,1,fp8,fp8,0,0.02160533269246419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,1,128,1,float16,fp8,0,0.01953599974513054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,4,128,1,float16,float16,0,0.019733333339293797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,2,128,1,float16,fp8,0,0.019776000330845516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,2,128,1,fp8,fp8,0,0.020618667205174763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,2,128,1,float16,float16,0,0.019850666324297588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,4,128,1,float16,fp8,0,0.019866666446129482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,4,128,1,fp8,fp8,0,0.020879998803138733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,8,128,1,float16,float16,0,0.019674666225910187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,1,128,1,fp8,fp8,0,0.020549333343903225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,8,128,1,float16,fp8,0,0.020207999895016353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,32,128,1,float16,float16,0,0.01806933308641116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,32,8,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,32,128,1,float16,fp8,0,0.018746666610240936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,32,128,1,fp8,fp8,0,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,1,128,1,float16,float16,0,0.018005333840847015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,1,128,1,float16,fp8,0,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,2,128,1,float16,float16,0,0.01830400029818217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,1,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,2,128,1,float16,fp8,0,0.018325333793958027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,2,128,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,4,128,1,float16,float16,0,0.018229333062966663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,4,128,1,float16,fp8,0,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,4,128,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,8,128,1,float16,float16,0,0.01858666663368543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,8,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,32,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,32,128,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,2,128,1,float16,float16,0,0.017808000246683758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,1,128,1,fp8,fp8,0,0.018533332894245785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,32,128,1,fp8,fp8,0,0.01844800015290578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,32,8,128,1,fp8,fp8,0,0.019760000209013622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,1,128,1,float16,fp8,0,0.017935999979575474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,1,128,1,float16,float16,0,0.017594666530688603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,2,128,1,float16,fp8,0,0.01800000046690305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,4,128,1,float16,float16,0,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,4,128,1,float16,fp8,0,0.017845333864291508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,4,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,8,128,1,float16,fp8,0,0.018272000054518383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,2,128,1,fp8,fp8,0,0.01858666663368543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,8,128,1,float16,float16,0,0.017840000490347546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,32,8,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,1,128,1,float16,float16,0,0.26678399244944256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,1,128,1,float16,fp8,0,0.2669279972712199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,1,128,1,fp8,fp8,0,0.3405706485112508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,2,128,1,float16,float16,0,0.2708746592203776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,2,128,1,float16,fp8,0,0.2715573310852051
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,4,128,1,float16,fp8,0,0.2746933301289876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,2,128,1,fp8,fp8,0,0.3468853235244751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,4,128,1,float16,float16,0,0.27533332506815594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,4,128,1,fp8,fp8,0,0.35020800431569415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,8,128,1,float16,float16,0,0.28328533967336017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,8,128,1,fp8,fp8,0,0.36130134264628094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,32,8,128,1,float16,fp8,0,0.28175999720891315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,32,128,1,float16,float16,0,0.1581546664237976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,1,128,1,float16,float16,0,0.14094932874043783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,1,128,1,float16,fp8,0,0.1418773333231608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,1,128,1,fp8,fp8,0,0.1818880041440328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,32,128,1,float16,fp8,0,0.15401066342989603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,32,128,1,fp8,fp8,0,0.19798400004704794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,2,128,1,float16,fp8,0,0.14270400007565817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,2,128,1,float16,float16,0,0.1420960028966268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,2,128,1,fp8,fp8,0,0.18338133891423544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,4,128,1,float16,fp8,0,0.14607999722162882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,4,128,1,float16,float16,0,0.1460693379243215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,4,128,1,fp8,fp8,0,0.184005339940389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,8,128,1,float16,float16,0,0.14934399724006653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,8,128,1,float16,fp8,0,0.1497706671555837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,32,128,1,float16,float16,0,0.08948799967765808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,32,8,128,1,fp8,fp8,0,0.18895467122395834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,1,128,1,float16,fp8,0,0.07754133145014445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,32,128,1,float16,fp8,0,0.087226668993632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,1,128,1,float16,float16,0,0.07681066791216533
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,32,128,1,fp8,fp8,0,0.11016000310579936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,1,128,1,fp8,fp8,0,0.09780800342559814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,2,128,1,float16,fp8,0,0.0769760012626648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,2,128,1,float16,float16,0,0.07740266621112823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,2,128,1,fp8,fp8,0,0.09913067022959392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,4,128,1,float16,float16,0,0.07784533500671387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,4,128,1,float16,fp8,0,0.07784533500671387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,4,128,1,fp8,fp8,0,0.1018293301264445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,8,128,1,float16,fp8,0,0.07962666451931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,8,128,1,fp8,fp8,0,0.10446400443712871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,32,128,1,fp8,fp8,0,0.062224000692367554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,32,128,1,float16,fp8,0,0.0470719983180364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,32,128,1,float16,float16,0,0.04674133161703745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,32,8,128,1,float16,float16,0,0.07956266899903615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,1,128,1,float16,float16,0,0.04548799991607666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,2,128,1,float16,float16,0,0.04507199923197428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,1,128,1,fp8,fp8,0,0.05665066838264465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,2,128,1,float16,fp8,0,0.04567466676235199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,1,128,1,float16,fp8,0,0.045941332976023354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,2,128,1,fp8,fp8,0,0.05710400144259135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,4,128,1,float16,float16,0,0.04553066690762838
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,8,128,1,float16,float16,0,0.04561600089073181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,4,128,1,float16,fp8,0,0.04586133360862732
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,8,128,1,fp8,fp8,0,0.0581226646900177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,8,128,1,float16,fp8,0,0.04595733185609182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,32,128,1,float16,fp8,0,0.0301706666747729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,32,4,128,1,fp8,fp8,0,0.05773333211739858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,32,128,1,float16,float16,0,0.030565333863099415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,32,128,1,fp8,fp8,0,0.036837334434191384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,1,128,1,float16,float16,0,0.030229332546393078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,1,128,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,2,128,1,float16,fp8,0,0.030058667063713074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,2,128,1,float16,float16,0,0.030080000559488933
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,1,128,1,float16,fp8,0,0.02996266633272171
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,4,128,1,float16,fp8,0,0.030661332110563915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,4,128,1,fp8,fp8,0,0.036533333361148834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,4,128,1,float16,float16,0,0.030858665704727173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,8,128,1,float16,float16,0,0.031018666923046112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,2,128,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,8,128,1,float16,fp8,0,0.03019733230272929
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,32,8,128,1,fp8,fp8,0,0.03681600093841553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,32,128,1,float16,fp8,0,0.022117334107557934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,1,128,1,float16,fp8,0,0.02160533269246419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,32,128,1,fp8,fp8,0,0.025749333202838898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,32,128,1,float16,float16,0,0.02221333235502243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,1,128,1,float16,float16,0,0.021909333765506744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,2,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,2,128,1,float16,fp8,0,0.02186666677395503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,1,128,1,fp8,fp8,0,0.02495466669400533
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,4,128,1,float16,float16,0,0.02164799968401591
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,4,128,1,float16,fp8,0,0.02216533323129018
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,2,128,1,fp8,fp8,0,0.02477866659561793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,8,128,1,float16,fp8,0,0.0216799999276797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,8,128,1,fp8,fp8,0,0.025013332565625507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,32,128,1,float16,fp8,0,0.01851733277241389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,8,128,1,float16,float16,0,0.021898667017618816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,32,128,1,fp8,fp8,0,0.020282667130231857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,32,128,1,float16,float16,0,0.020687999824682873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,32,4,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,1,128,1,float16,float16,0,0.01758933315674464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,1,128,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,2,128,1,float16,float16,0,0.017887999614079792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,1,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,2,128,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,2,128,1,float16,fp8,0,0.018053332964579265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,4,128,1,fp8,fp8,0,0.01988799994190534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,8,128,1,float16,float16,0,0.017770666629076004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,8,128,1,float16,fp8,0,0.01850133389234543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,4,128,1,float16,float16,0,0.017968000223239262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,4,128,1,float16,fp8,0,0.017797333498795826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,32,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,32,8,128,1,fp8,fp8,0,0.019530666371186573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,1,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,1,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,32,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,2,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,32,128,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,2,128,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,1,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,2,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,4,128,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,4,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,8,128,1,float16,float16,0,0.016837333639462788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,4,128,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,8,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,32,8,128,1,float16,fp8,0,0.0176959993938605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,32,128,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,1,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,1,128,1,float16,float16,0,0.016501333564519882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,2,128,1,float16,float16,0,0.016303999970356624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,1,128,1,fp8,fp8,0,0.018138666947682697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,32,128,1,float16,float16,0,0.01613333324591319
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,32,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,2,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,4,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,4,128,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,2,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,4,128,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,8,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,8,128,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,32,8,128,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,1,128,1,float16,float16,0,0.22724266846974692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,1,128,1,float16,fp8,0,0.2274613380432129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,1,128,1,fp8,fp8,0,0.30060267448425293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,2,128,1,float16,float16,0,0.2290239930152893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,2,128,1,fp8,fp8,0,0.30245866378148395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,2,128,1,float16,fp8,0,0.22933334112167358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,4,128,1,float16,float16,0,0.23132266600926718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,4,128,1,fp8,fp8,0,0.3035786747932434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,8,128,1,float16,float16,0,0.2349546750386556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,4,128,1,float16,fp8,0,0.2310826579729716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,8,128,1,float16,fp8,0,0.23523199558258057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,32,8,128,1,fp8,fp8,0,0.30955733855565387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,1,128,1,float16,fp8,0,0.12014399965604146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,32,128,1,float16,fp8,0,0.12200533350308736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,32,128,1,float16,float16,0,0.12365333239237468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,32,128,1,fp8,fp8,0,0.16955200831095377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,2,128,1,float16,fp8,0,0.12075733145078023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,2,128,1,float16,float16,0,0.11992533008257548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,2,128,1,fp8,fp8,0,0.15982400377591452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,1,128,1,fp8,fp8,0,0.15937599539756775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,1,128,1,float16,float16,0,0.12016533811887105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,4,128,1,float16,float16,0,0.12123200297355652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,4,128,1,fp8,fp8,0,0.16220800081888834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,4,128,1,float16,fp8,0,0.12133333086967468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,8,128,1,float16,float16,0,0.12269866466522217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,8,128,1,fp8,fp8,0,0.16482667128245035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,32,8,128,1,float16,fp8,0,0.12291733423868816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,32,128,1,float16,fp8,0,0.06599999964237213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,1,128,1,float16,float16,0,0.06761066615581512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,32,128,1,fp8,fp8,0,0.0932426651318868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,32,128,1,float16,float16,0,0.06658133367697398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,1,128,1,float16,fp8,0,0.0674239993095398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,2,128,1,float16,float16,0,0.06699199974536896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,1,128,1,fp8,fp8,0,0.08833600083986919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,2,128,1,fp8,fp8,0,0.08886399865150452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,2,128,1,float16,fp8,0,0.06716266771157582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,4,128,1,float16,float16,0,0.06769066552321117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,4,128,1,float16,fp8,0,0.06754133105278015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,8,128,1,float16,fp8,0,0.06805866460005443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,8,128,1,float16,float16,0,0.06778666873772939
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,32,128,1,float16,float16,0,0.03955200066169103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,8,128,1,fp8,fp8,0,0.08980266253153484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,32,128,1,float16,fp8,0,0.03984000037113825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,32,4,128,1,fp8,fp8,0,0.08874133229255676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,32,128,1,fp8,fp8,0,0.052832002441088356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,1,128,1,float16,fp8,0,0.040805332362651825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,2,128,1,float16,float16,0,0.041375999649365745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,1,128,1,fp8,fp8,0,0.05177066723505656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,2,128,1,fp8,fp8,0,0.051957334081331887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,2,128,1,float16,fp8,0,0.041477332512537636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,1,128,1,float16,float16,0,0.04068800061941147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,4,128,1,fp8,fp8,0,0.052602668603261314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,4,128,1,float16,fp8,0,0.04161066561937332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,8,128,1,float16,float16,0,0.04178133110205332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,4,128,1,float16,float16,0,0.041477332512537636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,8,128,1,float16,fp8,0,0.04201599955558777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,32,8,128,1,fp8,fp8,0,0.05286933481693268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,32,128,1,float16,float16,0,0.027621333797772724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,32,128,1,fp8,fp8,0,0.03377600014209747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,1,128,1,float16,float16,0,0.027637332677841187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,1,128,1,float16,fp8,0,0.028517333169778187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,32,128,1,float16,fp8,0,0.02749866743882497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,2,128,1,float16,fp8,0,0.027930667002995808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,2,128,1,float16,float16,0,0.027930667002995808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,2,128,1,fp8,fp8,0,0.03346133232116699
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,1,128,1,fp8,fp8,0,0.03270400067170461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,4,128,1,float16,float16,0,0.02810666710138321
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,4,128,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,4,128,1,float16,fp8,0,0.02815466622511546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,8,128,1,float16,fp8,0,0.02811199923356374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,8,128,1,float16,float16,0,0.027744000156720478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,32,128,1,float16,float16,0,0.02060266708334287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,32,8,128,1,fp8,fp8,0,0.03449599941571554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,32,128,1,fp8,fp8,0,0.02442666639884313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,1,128,1,float16,float16,0,0.02029866725206375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,1,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,32,128,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,1,128,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,4,128,1,float16,float16,0,0.020629333953062694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,2,128,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,2,128,1,fp8,fp8,0,0.023738667368888855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,4,128,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,4,128,1,fp8,fp8,0,0.02418133368094762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,2,128,1,float16,float16,0,0.020432000358899433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,8,128,1,fp8,fp8,0,0.02422933280467987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,8,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,32,128,1,float16,float16,0,0.017829333742459614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,32,128,1,fp8,fp8,0,0.01988799994190534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,32,128,1,float16,fp8,0,0.017957333475351334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,1,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,32,8,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,1,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,1,128,1,float16,fp8,0,0.018005333840847015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,2,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,2,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,2,128,1,float16,fp8,0,0.017994667092959087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,4,128,1,float16,float16,0,0.01752000053723653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,4,128,1,float16,fp8,0,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,8,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,4,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,8,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,32,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,32,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,1,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,32,128,1,fp8,fp8,0,0.01868266612291336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,1,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,32,8,128,1,float16,fp8,0,0.020794666061798733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,2,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,2,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,4,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,2,128,1,fp8,fp8,0,0.0183146670460701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,4,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,4,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,1,128,1,fp8,fp8,0,0.018325333793958027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,8,128,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,8,128,1,fp8,fp8,0,0.018687999496857326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,32,128,1,float16,float16,0,0.0163680004576842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,32,8,128,1,float16,float16,0,0.016389333953460056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,32,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,32,128,1,fp8,fp8,0,0.017957333475351334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,1,128,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,1,128,1,float16,float16,0,0.016399999459584553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,2,128,1,float16,float16,0,0.01640533283352852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,2,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,4,128,1,fp8,fp8,0,0.018133333573738735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,1,128,1,float16,fp8,0,0.016634666671355564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,4,128,1,float16,float16,0,0.016741332908471424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,4,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,8,128,1,float16,float16,0,0.016447999825080235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,8,128,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,8,128,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,32,2,128,1,fp8,fp8,0,0.01793066660563151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,128,1,float16,float16,0,25.0522944132487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,128,1,float16,fp8,0,24.956644694010418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,128,1,fp8,fp8,0,16.486347198486328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,128,1,float16,float16,0,25.542154947916668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,128,1,fp8,fp8,0,16.53493881225586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,128,1,float16,fp8,0,25.232777913411457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,128,1,float16,float16,0,24.89232126871745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,128,1,float16,fp8,0,25.003829956054688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,128,1,float16,float16,0,12.606021881103516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,128,1,fp8,fp8,0,16.57321548461914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,128,1,fp8,fp8,0,8.404101053873697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,128,1,float16,float16,0,12.558639526367188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,128,1,fp8,fp8,0,16.993423461914062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,128,1,float16,fp8,0,25.0521977742513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,128,1,float16,fp8,0,12.699556986490885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,128,1,float16,float16,0,25.05682118733724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,128,1,float16,fp8,0,12.463642120361328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,128,1,fp8,fp8,0,8.474042892456055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,128,1,float16,float16,0,12.54213841756185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,128,1,fp8,fp8,0,8.614693323771158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,128,1,float16,fp8,0,12.52566401163737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,128,1,fp8,fp8,0,8.271856307983398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,128,1,float16,fp8,0,12.763882954915365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,128,1,float16,float16,0,12.634432474772135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,128,1,float16,float16,0,12.63272476196289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,128,1,float16,fp8,0,12.631370544433594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,128,1,float16,fp8,0,6.088127772013347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,128,1,float16,float16,0,6.312496185302734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,128,1,fp8,fp8,0,4.111776034037272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,128,1,fp8,fp8,0,8.36188824971517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,128,1,float16,float16,0,6.455141067504883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,128,1,float16,float16,0,6.21504020690918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,128,1,float16,fp8,0,6.324581146240234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,128,1,fp8,fp8,0,4.212186813354492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,128,1,float16,fp8,0,6.270816167195638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,128,1,fp8,fp8,0,4.200912157694499
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,128,1,float16,float16,0,6.36900266011556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,128,1,fp8,fp8,0,4.218096097310384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,128,1,float16,fp8,0,6.221434911092122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,128,1,float16,float16,0,3.11573855082194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,128,1,float16,float16,0,6.3920103708903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,128,1,fp8,fp8,0,4.154608090718587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,128,1,float16,fp8,0,6.278058369954427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,128,1,float16,fp8,0,3.1294771830240884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,128,1,float16,float16,0,3.1791254679361978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,128,1,float16,fp8,0,3.178330739339193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,128,1,fp8,fp8,0,2.1744747161865234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,128,1,float16,float16,0,3.1490348180135093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,128,1,fp8,fp8,0,2.1952053705851235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,128,1,fp8,fp8,0,2.1807947158813477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,128,1,float16,fp8,0,3.136325200398763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,128,1,float16,float16,0,3.1518774032592773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,128,1,float16,fp8,0,3.24292786916097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,128,1,fp8,fp8,0,2.186720053354899
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,128,1,float16,fp8,0,3.137685457865397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,128,1,float16,float16,0,3.1433706283569336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,128,1,fp8,fp8,0,2.195312023162842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,128,1,fp8,fp8,0,9.644757588704428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,128,1,float16,fp8,0,14.196186065673828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,128,1,float16,float16,0,14.525306701660156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,128,1,float16,float16,0,14.792058308919271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,128,1,fp8,fp8,0,9.660170873006185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,128,1,float16,fp8,0,14.645428975423178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,128,1,float16,fp8,0,14.455771128336588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,128,1,float16,float16,0,14.424901326497396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,128,1,fp8,fp8,0,9.726986567179361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,128,1,float16,float16,0,7.034048080444336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,128,1,float16,float16,0,14.56722640991211
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,128,1,float16,float16,0,7.211568196614583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,128,1,float16,fp8,0,14.52907689412435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,128,1,float16,fp8,0,7.167237599690755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,128,1,fp8,fp8,0,10.05197842915853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,128,1,float16,fp8,0,7.1088104248046875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,128,1,fp8,fp8,0,4.826517422993978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,128,1,fp8,fp8,0,4.921813329060872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,128,1,float16,float16,0,7.211829503377278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,128,1,float16,fp8,0,7.128704071044922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,128,1,fp8,fp8,0,4.734970728556315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,128,1,float16,float16,0,7.250848134358724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,128,1,float16,fp8,0,7.3239091237386065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,128,1,fp8,fp8,0,4.930944124857585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,128,1,float16,float16,0,7.256005605061849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,128,1,float16,float16,0,3.677728017171224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,128,1,float16,fp8,0,7.248229344685872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,128,1,fp8,fp8,0,4.775712013244629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,128,1,float16,fp8,0,3.533503850301107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,128,1,float16,float16,0,3.511861483256022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,128,1,fp8,fp8,0,2.4543892542521157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,128,1,float16,fp8,0,3.485919952392578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,128,1,fp8,fp8,0,2.4885600407918296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,128,1,float16,float16,0,3.605322519938151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,128,1,fp8,fp8,0,2.462000052134196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,128,1,float16,fp8,0,3.620778719584147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,128,1,float16,float16,0,3.6229171752929688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,128,1,float16,fp8,0,3.5097812016805015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,128,1,fp8,fp8,0,2.4644373257954917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,128,1,float16,float16,0,3.5346399943033853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,128,1,float16,fp8,0,3.568058649698893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,128,1,float16,float16,0,1.8811999956766765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,128,1,float16,fp8,0,1.8592586517333984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,128,1,fp8,fp8,0,2.484842618306478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,128,1,float16,float16,0,1.878010590871175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,128,1,float16,fp8,0,1.8492693901062012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,128,1,fp8,fp8,0,1.3339734077453613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,128,1,fp8,fp8,0,1.35208527247111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,128,1,float16,float16,0,1.88154141108195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,128,1,float16,fp8,0,1.8660213152567546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,128,1,fp8,fp8,0,1.3337866465250652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,128,1,float16,float16,0,1.9031200408935547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,128,1,float16,fp8,0,1.876698652903239
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,128,1,fp8,fp8,0,1.3406933148701985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,128,1,float16,float16,0,1.8865706125895183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,128,1,fp8,fp8,0,1.3443199793497722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,128,1,float16,fp8,0,1.8890773455301921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,128,1,float16,float16,0,10.455024083455404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,128,1,fp8,fp8,0,6.981221516927083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,128,1,float16,fp8,0,10.189701080322266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,128,1,float16,float16,0,10.267498652140299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,128,1,float16,fp8,0,10.375653584798178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,128,1,fp8,fp8,0,6.845546722412109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,128,1,float16,float16,0,10.225658416748047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,128,1,float16,fp8,0,10.389402389526367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,128,1,fp8,fp8,0,6.879088083902995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,128,1,float16,float16,0,5.139360109965007
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,128,1,float16,float16,0,10.622917175292969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,128,1,float16,fp8,0,4.856378555297852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,128,1,float16,fp8,0,10.274112065633139
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,128,1,fp8,fp8,0,6.960351943969727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,128,1,float16,float16,0,5.1538880666097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,128,1,float16,fp8,0,5.073322614034017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,128,1,fp8,fp8,0,3.5165440241495767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,128,1,fp8,fp8,0,3.416997273763021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,128,1,float16,float16,0,5.254485448201497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,128,1,float16,fp8,0,4.87940788269043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,128,1,fp8,fp8,0,3.419589360555013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,128,1,fp8,fp8,0,3.4367945988972983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,128,1,float16,float16,0,5.145296096801758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,128,1,float16,fp8,0,5.059621175130208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,128,1,float16,float16,0,5.020570755004883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,128,1,float16,fp8,0,5.151930809020996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,128,1,float16,float16,0,2.5496907234191895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,128,1,float16,fp8,0,2.507919947306315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,128,1,fp8,fp8,0,1.7916320164998372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,128,1,float16,float16,0,2.5229652722676597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,128,1,fp8,fp8,0,3.4690186182657876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,128,1,float16,fp8,0,2.5077813466389975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,128,1,fp8,fp8,0,1.835770606994629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,128,1,float16,float16,0,2.5587679545084634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,128,1,float16,fp8,0,2.532368024190267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,128,1,fp8,fp8,0,1.7972159385681152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,128,1,float16,fp8,0,2.5120479265848794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,128,1,float16,float16,0,2.5344692866007485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,128,1,fp8,fp8,0,1.8043947219848633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,128,1,float16,float16,0,1.365536053975423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,128,1,float16,fp8,0,2.536853313446045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,128,1,float16,float16,0,1.3681492805480957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,128,1,float16,float16,0,2.539050738016764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,128,1,fp8,fp8,0,1.8170347213745117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,128,1,float16,fp8,0,1.3634400367736816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,128,1,fp8,fp8,0,0.9588692982991537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,128,1,fp8,fp8,0,0.9349866708119711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,128,1,float16,fp8,0,1.3491573333740234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,128,1,fp8,fp8,0,0.9393653074900309
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,128,1,float16,float16,0,1.3712372779846191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,128,1,float16,fp8,0,1.3608907063802083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,128,1,float16,float16,0,1.3833173116048176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,128,1,float16,fp8,0,1.3683360417683919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,128,1,fp8,fp8,0,0.9414026737213135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,128,1,float16,float16,0,1.3735520044962566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,128,1,float16,fp8,0,1.3612906138102214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,128,1,fp8,fp8,0,0.9475253423055013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,128,1,float16,fp8,0,13.382181803385416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,128,1,float16,float16,0,13.580949147542318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,128,1,fp8,fp8,0,9.495728174845377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,128,1,float16,float16,0,13.841983795166016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,128,1,float16,fp8,0,13.434944152832031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,128,1,fp8,fp8,0,9.30787722269694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,128,1,float16,float16,0,13.650634765625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,128,1,float16,fp8,0,13.48632558186849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,128,1,float16,float16,0,6.598458607991536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,128,1,fp8,fp8,0,9.350159962972006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,128,1,float16,float16,0,6.778330485026042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,128,1,float16,fp8,0,6.731029510498047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,128,1,float16,fp8,0,6.7293440500895185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,128,1,fp8,fp8,0,9.663061141967773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,128,1,float16,fp8,0,13.693930308024088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,128,1,fp8,fp8,0,4.762271881103516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,128,1,float16,float16,0,13.623093922932943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,128,1,fp8,fp8,0,4.606399854024251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,128,1,float16,float16,0,6.653045018513997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,128,1,float16,fp8,0,6.553802490234375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,128,1,fp8,fp8,0,4.602261225382487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,128,1,float16,fp8,0,6.870581309000651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,128,1,float16,float16,0,6.838565190633138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,128,1,fp8,fp8,0,4.611477216084798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,128,1,float16,float16,0,6.57481575012207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,128,1,float16,fp8,0,6.526645024617513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,128,1,float16,float16,0,3.350192070007324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,128,1,float16,fp8,0,3.2100159327189126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,128,1,float16,float16,0,3.247866630554199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,128,1,float16,fp8,0,3.3336267471313477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,128,1,fp8,fp8,0,4.62718931833903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,128,1,fp8,fp8,0,2.3252906799316406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,128,1,float16,float16,0,3.2660481135050454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,128,1,float16,fp8,0,3.259183883666992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,128,1,fp8,fp8,0,2.3333226839701333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,128,1,fp8,fp8,0,2.411109288533529
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,128,1,float16,float16,0,3.3545494079589844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,128,1,float16,fp8,0,3.220794677734375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,128,1,fp8,fp8,0,2.340277353922526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,128,1,float16,float16,0,3.2732105255126953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,128,1,float16,fp8,0,3.264298756917318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,128,1,float16,float16,0,1.7009867032368977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,128,1,float16,fp8,0,1.6834239959716797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,128,1,fp8,fp8,0,2.3640213012695312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,128,1,float16,float16,0,1.7069493929545085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,128,1,float16,fp8,0,1.7184960047403972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,128,1,fp8,fp8,0,1.2657653490702312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,128,1,fp8,fp8,0,1.2375733057657878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,128,1,float16,float16,0,1.7158719698588054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,128,1,float16,fp8,0,1.6856692632039387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,128,1,fp8,fp8,0,1.241919994354248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,128,1,float16,fp8,0,1.6908159255981445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,128,1,float16,float16,0,1.7269706726074219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,128,1,fp8,fp8,0,1.2431573073069255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,128,1,float16,float16,0,1.711429278055827
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,128,1,fp8,fp8,0,1.2517546812693279
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,128,1,float16,fp8,0,1.7022612889607747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,128,1,float16,float16,0,0.9429120222727457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,128,1,float16,float16,0,0.9305919806162516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,128,1,float16,fp8,0,0.9249493281046549
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,128,1,float16,fp8,0,0.9308693408966064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,128,1,fp8,fp8,0,0.650165319442749
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,128,1,float16,fp8,0,0.9298453330993652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,128,1,fp8,fp8,0,0.6697706381479899
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,128,1,float16,float16,0,0.9419306914011637
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,128,1,fp8,fp8,0,0.6529759963353475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,128,1,float16,float16,0,0.9391520023345947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,128,1,float16,fp8,0,0.9319679737091064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,128,1,fp8,fp8,0,0.6567999919255575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,128,1,float16,float16,0,0.9414506753285726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,128,1,float16,fp8,0,0.9327147006988525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,128,1,fp8,fp8,0,0.6618933280309042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,128,1,float16,fp8,0,7.986853281656901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,128,1,fp8,fp8,0,5.49677848815918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,128,1,float16,float16,0,8.032106399536133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,128,1,fp8,fp8,0,5.510997136433919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,128,1,float16,fp8,0,7.970826466878255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,128,1,float16,float16,0,7.951887766520183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,128,1,float16,float16,0,7.967568079630534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,128,1,float16,fp8,0,7.8291676839192705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,128,1,float16,float16,0,3.8484586079915366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,128,1,fp8,fp8,0,5.705434799194336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,128,1,fp8,fp8,0,5.64138666788737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,128,1,float16,float16,0,8.001295725504557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,128,1,float16,float16,0,4.028527895609538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,128,1,float16,fp8,0,7.859301249186198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,128,1,float16,fp8,0,3.829264005025228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,128,1,float16,fp8,0,3.7431891759236655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,128,1,fp8,fp8,0,2.895690600077311
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,128,1,fp8,fp8,0,2.7785654067993164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,128,1,float16,float16,0,3.9108638763427734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,128,1,float16,fp8,0,3.845402717590332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,128,1,fp8,fp8,0,2.790287971496582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,128,1,float16,float16,0,3.8097171783447266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,128,1,float16,fp8,0,3.8404693603515625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,128,1,fp8,fp8,0,2.8016586303710938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,128,1,float16,fp8,0,3.9842348098754883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,128,1,float16,float16,0,3.8807201385498047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,128,1,float16,float16,0,1.9538720448811848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,128,1,float16,fp8,0,1.9283359845479329
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,128,1,float16,float16,0,1.9741333325703938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,128,1,float16,fp8,0,1.9904319445292156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,128,1,fp8,fp8,0,1.4416799545288086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,128,1,fp8,fp8,0,2.8253278732299805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,128,1,float16,float16,0,1.9662292798360188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,128,1,fp8,fp8,0,1.50980806350708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,128,1,float16,fp8,0,1.934266726175944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,128,1,fp8,fp8,0,1.4454879760742188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,128,1,float16,float16,0,1.9578612645467122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,128,1,fp8,fp8,0,1.4533012708028157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,128,1,float16,fp8,0,1.9571040471394856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,128,1,float16,float16,0,1.9641599655151367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,128,1,float16,fp8,0,1.9609920183817546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,128,1,fp8,fp8,0,1.4693066279093425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,128,1,float16,float16,0,1.0492053031921387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,128,1,float16,float16,0,1.0480693181355794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,128,1,float16,fp8,0,1.0324587027231853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,128,1,fp8,fp8,0,0.7760480244954427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,128,1,float16,float16,0,1.0463253657023113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,128,1,float16,fp8,0,1.0287199815114338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,128,1,fp8,fp8,0,0.7800426483154297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,128,1,float16,fp8,0,1.0394240220387776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,128,1,fp8,fp8,0,0.8070826530456543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,128,1,float16,float16,0,1.054975986480713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,128,1,fp8,fp8,0,0.7810719807942709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,128,1,float16,fp8,0,1.0392639636993408
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,128,1,float16,float16,0,1.0541226863861084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,128,1,float16,fp8,0,1.0373493035634358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,128,1,float16,fp8,0,0.5804800192515055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,128,1,fp8,fp8,0,0.7909653186798096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,128,1,float16,float16,0,0.5879626671473185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,128,1,float16,float16,0,0.5862773259480795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,128,1,fp8,fp8,0,0.43299198150634766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,128,1,float16,fp8,0,0.5810079971949259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,128,1,fp8,fp8,0,0.41705600420633954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,128,1,float16,float16,0,0.5894240140914917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,128,1,float16,fp8,0,0.5805866718292236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,128,1,float16,fp8,0,0.5807146628697714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,128,1,fp8,fp8,0,0.4171146551767985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,128,1,fp8,fp8,0,0.42007466157277423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,128,1,float16,float16,0,0.591754674911499
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,128,1,float16,float16,0,0.5932266712188721
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,128,1,float16,fp8,0,0.5865279833475748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,128,1,fp8,fp8,0,0.42337600390116376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,128,1,float16,fp8,0,7.554442723592122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,128,1,float16,float16,0,7.647776285807292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,128,1,float16,float16,0,7.817365646362305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,128,1,fp8,fp8,0,5.578725179036458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,128,1,fp8,fp8,0,5.657829284667969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,128,1,float16,fp8,0,7.6336212158203125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,128,1,float16,fp8,0,7.616325378417969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,128,1,float16,float16,0,7.769392013549805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,128,1,float16,float16,0,3.754096031188965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,128,1,fp8,fp8,0,5.6289011637369795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,128,1,float16,fp8,0,3.6530933380126953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,128,1,float16,fp8,0,3.8907734553019204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,128,1,float16,fp8,0,7.851877212524414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,128,1,fp8,fp8,0,5.708431879679362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,128,1,float16,float16,0,7.849536259969075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,128,1,float16,float16,0,3.85588804880778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,128,1,fp8,fp8,0,2.816783905029297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,128,1,fp8,fp8,0,2.9985758463541665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,128,1,float16,float16,0,3.77021853129069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,128,1,fp8,fp8,0,2.8348585764567056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,128,1,float16,fp8,0,3.729599952697754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,128,1,float16,fp8,0,3.7584425608317056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,128,1,float16,float16,0,3.727130572001139
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,128,1,fp8,fp8,0,2.859648068745931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,128,1,float16,float16,0,3.736581484476725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,128,1,float16,fp8,0,3.733674685160319
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,128,1,float16,float16,0,1.880949338277181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,128,1,float16,fp8,0,1.864005406697591
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,128,1,fp8,fp8,0,1.4381066958109539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,128,1,float16,float16,0,1.956607977549235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,128,1,fp8,fp8,0,2.8921868006388345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,128,1,float16,fp8,0,1.9150293668111165
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,128,1,float16,float16,0,1.8836426734924316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,128,1,float16,fp8,0,1.8560266494750977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,128,1,fp8,fp8,0,1.5367093086242676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,128,1,fp8,fp8,0,1.447205384572347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,128,1,float16,float16,0,1.8907146453857422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,128,1,fp8,fp8,0,1.4568692843119304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,128,1,float16,fp8,0,1.8808959325154622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,128,1,float16,float16,0,1.9029919306437175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,128,1,float16,fp8,0,1.8788053194681804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,128,1,float16,fp8,0,0.9752853711446127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,128,1,float16,float16,0,0.9884160359700521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,128,1,fp8,fp8,0,1.4756959279378254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,128,1,float16,float16,0,1.003754695256551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,128,1,float16,fp8,0,1.0093066692352295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,128,1,fp8,fp8,0,0.762880007425944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,128,1,fp8,fp8,0,0.8135626316070557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,128,1,float16,float16,0,0.9883733590443929
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,128,1,fp8,fp8,0,0.765343983968099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,128,1,float16,fp8,0,0.9787946542104086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,128,1,float16,float16,0,0.9931893348693848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,128,1,float16,fp8,0,0.9764213562011719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,128,1,fp8,fp8,0,0.7718773682912191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,128,1,float16,fp8,0,0.9820693333943685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,128,1,float16,float16,0,0.5412319898605347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,128,1,float16,float16,0,1.0035520394643147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,128,1,float16,fp8,0,0.5464106798171997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,128,1,float16,float16,0,0.5487146774927775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,128,1,fp8,fp8,0,0.7820266882578532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,128,1,fp8,fp8,0,0.4289546807607015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,128,1,fp8,fp8,0,0.3988106648127238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,128,1,float16,float16,0,0.5425599813461304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,128,1,fp8,fp8,0,0.4020479917526245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,128,1,float16,fp8,0,0.532693346341451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,128,1,float16,fp8,0,0.5341013272603353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,128,1,float16,float16,0,0.5455360015233358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,128,1,float16,fp8,0,0.534986654917399
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,128,1,fp8,fp8,0,0.4043360153834025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,128,1,float16,float16,0,0.5454506476720175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,128,1,float16,float16,0,0.2834933400154114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,128,1,float16,fp8,0,0.5390719970067342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,128,1,fp8,fp8,0,0.24368000030517578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,128,1,float16,fp8,0,0.27904532353083294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,128,1,fp8,fp8,0,0.41145066420237225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,128,1,float16,fp8,0,0.28707732756932575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,128,1,fp8,fp8,0,0.23014400402704874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,128,1,float16,float16,0,0.2930613358815511
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,128,1,float16,float16,0,0.28412266572316486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,128,1,float16,fp8,0,0.2781599958737691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,128,1,fp8,fp8,0,0.23348265886306763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,128,1,float16,float16,0,0.2850400010744731
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,128,1,float16,fp8,0,0.2812053362528483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,128,1,fp8,fp8,0,0.23439466953277588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,128,1,float16,float16,0,0.2863679925600688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,128,1,fp8,fp8,0,0.23758399486541748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,128,1,float16,fp8,0,0.28177066644032794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,128,1,float16,float16,0,4.593023935953776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,128,1,fp8,fp8,0,3.572271982828776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,128,1,float16,fp8,0,4.575119972229004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,128,1,float16,float16,0,4.65499210357666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,128,1,fp8,fp8,0,3.578314781188965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,128,1,float16,fp8,0,4.446554819742839
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,128,1,float16,float16,0,4.566357294718425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,128,1,float16,fp8,0,4.514325459798177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,128,1,fp8,fp8,0,3.605877240498861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,128,1,float16,float16,0,2.2946507136027017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,128,1,float16,fp8,0,4.542991956075032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,128,1,float16,float16,0,4.64082145690918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,128,1,float16,float16,0,2.3534933725992837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,128,1,float16,fp8,0,2.3423253695170083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,128,1,fp8,fp8,0,3.6691681543986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,128,1,fp8,fp8,0,1.9482827186584473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,128,1,float16,fp8,0,2.2330452601114907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,128,1,fp8,fp8,0,1.8022185961405437
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,128,1,float16,float16,0,2.2915627161661782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,128,1,fp8,fp8,0,1.808901309967041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,128,1,float16,fp8,0,2.2415520350138345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,128,1,float16,float16,0,2.292853355407715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,128,1,float16,fp8,0,2.2837279637654624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,128,1,fp8,fp8,0,1.8276480038960774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,128,1,float16,float16,0,1.178165356318156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,128,1,float16,float16,0,2.3141759236653647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,128,1,float16,fp8,0,1.1514506340026855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,128,1,float16,float16,0,1.207306702931722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,128,1,float16,fp8,0,1.20470396677653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,128,1,fp8,fp8,0,1.8561174074808757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,128,1,float16,float16,0,1.1754506429036458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,128,1,fp8,fp8,0,1.0130133628845215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,128,1,float16,fp8,0,2.304080009460449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,128,1,fp8,fp8,0,0.9324959913889567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,128,1,fp8,fp8,0,0.9328426520029703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,128,1,float16,fp8,0,1.156282663345337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,128,1,float16,float16,0,1.182261308034261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,128,1,float16,fp8,0,1.1631413300832112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,128,1,fp8,fp8,0,0.94158935546875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,128,1,float16,float16,0,1.1884640057881672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,128,1,fp8,fp8,0,0.9583253065745035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,128,1,float16,fp8,0,1.179200013478597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,128,1,float16,float16,0,0.6257493495941162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,128,1,float16,fp8,0,0.6117440064748129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,128,1,fp8,fp8,0,0.5013920068740845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,128,1,float16,float16,0,0.6383573214213053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,128,1,fp8,fp8,0,0.5415253241856893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,128,1,float16,float16,0,0.6288959980010986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,128,1,float16,fp8,0,0.6361120144526163
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,128,1,fp8,fp8,0,0.5022879838943481
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,128,1,float16,fp8,0,0.6171786785125732
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,128,1,float16,float16,0,0.6287146806716919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,128,1,float16,fp8,0,0.6185546716054281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,128,1,fp8,fp8,0,0.5056480169296265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,128,1,float16,float16,0,0.6334986686706543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,128,1,float16,fp8,0,0.6256639957427979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,128,1,float16,float16,0,0.3492586612701416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,128,1,float16,float16,0,0.3581013282140096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,128,1,float16,fp8,0,0.34175999959309894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,128,1,fp8,fp8,0,0.2878933350245158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,128,1,float16,fp8,0,0.3558613459269206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,128,1,fp8,fp8,0,0.26658133665720624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,128,1,fp8,fp8,0,0.5159466663996378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,128,1,float16,float16,0,0.3521706660588582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,128,1,float16,fp8,0,0.344709316889445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,128,1,fp8,fp8,0,0.2677866617838542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,128,1,float16,fp8,0,0.3463306824366252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,128,1,fp8,fp8,0,0.270416001478831
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,128,1,float16,float16,0,0.35502398014068604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,128,1,float16,float16,0,0.3519039948781331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,128,1,float16,fp8,0,0.34911465644836426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,128,1,fp8,fp8,0,0.2739786704381307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,128,1,float16,float16,0,0.19529600938161215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,128,1,float16,fp8,0,0.18407466014226279
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,128,1,float16,fp8,0,0.19385600090026855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,128,1,fp8,fp8,0,0.16809600591659546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,128,1,fp8,fp8,0,0.15618133544921875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,128,1,float16,float16,0,0.1877866586049398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,128,1,float16,fp8,0,0.18530132373174033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,128,1,float16,float16,0,0.1877546707789103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,128,1,fp8,fp8,0,0.15726932883262634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,128,1,float16,float16,0,0.18872533241907755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,128,1,float16,fp8,0,0.18589866161346436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,128,1,fp8,fp8,0,0.15959466497103372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,128,1,float16,float16,0,0.18978667259216309
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,128,1,float16,fp8,0,0.18703999121983847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,128,1,fp8,fp8,0,0.16247466206550598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,128,1,float16,fp8,0,4.619615872701009
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,128,1,fp8,fp8,0,3.921834627787272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,128,1,float16,float16,0,4.791589419047038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,128,1,float16,fp8,0,4.602533340454102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,128,1,float16,float16,0,4.777471860249837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,128,1,fp8,fp8,0,3.935946782430013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,128,1,float16,float16,0,4.866655985514323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,128,1,float16,fp8,0,4.682442665100098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,128,1,float16,float16,0,2.3874719937642417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,128,1,fp8,fp8,0,3.988981246948242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,128,1,float16,fp8,0,2.3183627128601074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,128,1,float16,float16,0,4.931669235229492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,128,1,float16,fp8,0,2.455669403076172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,128,1,fp8,fp8,0,4.037973403930664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,128,1,float16,fp8,0,4.790805180867513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,128,1,float16,float16,0,2.5161760648091636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,128,1,fp8,fp8,0,2.161424001057943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,128,1,fp8,fp8,0,1.9706506729125977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,128,1,float16,float16,0,2.393535931905111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,128,1,float16,fp8,0,2.3177226384480796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,128,1,fp8,fp8,0,1.9766826629638672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,128,1,float16,float16,0,2.3995200792948403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,128,1,float16,fp8,0,2.3350399335225425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,128,1,fp8,fp8,0,2.002138614654541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,128,1,float16,float16,0,1.2063626448313396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,128,1,float16,fp8,0,2.35805336634318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,128,1,float16,float16,0,2.4366559982299805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,128,1,float16,fp8,0,1.1736213366190593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,128,1,fp8,fp8,0,2.037722587585449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,128,1,float16,float16,0,1.2664639949798584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,128,1,fp8,fp8,0,0.9986826578776041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,128,1,float16,fp8,0,1.2441973686218262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,128,1,fp8,fp8,0,1.105178674062093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,128,1,float16,float16,0,1.2037546634674072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,128,1,float16,fp8,0,1.1804426511128743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,128,1,fp8,fp8,0,1.0053866704305012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,128,1,float16,float16,0,1.2153120040893555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,128,1,float16,fp8,0,1.189296007156372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,128,1,fp8,fp8,0,1.0149013201395671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,128,1,float16,float16,0,0.6313120126724243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,128,1,float16,fp8,0,1.2010773022969563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,128,1,float16,float16,0,1.222373326619466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,128,1,fp8,fp8,0,1.034117301305135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,128,1,float16,fp8,0,0.6158399979273478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,128,1,float16,float16,0,0.6614880164464315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,128,1,fp8,fp8,0,0.5299466848373413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,128,1,float16,fp8,0,0.6508533159891764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,128,1,fp8,fp8,0,0.5807573397954305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,128,1,float16,float16,0,0.6312853495279948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,128,1,float16,fp8,0,0.6160906553268433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,128,1,fp8,fp8,0,0.5279093186060587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,128,1,float16,float16,0,0.6360693375269572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,128,1,float16,fp8,0,0.6238240003585815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,128,1,fp8,fp8,0,0.5342773199081421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,128,1,float16,float16,0,0.35602664947509766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,128,1,float16,fp8,0,0.6298186779022217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,128,1,fp8,fp8,0,0.5429066816965739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,128,1,float16,fp8,0,0.352234681447347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,128,1,float16,float16,0,0.6418773333231608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,128,1,float16,float16,0,0.3435200055440267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,128,1,float16,fp8,0,0.3341546853383382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,128,1,fp8,fp8,0,0.27642667293548584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,128,1,fp8,fp8,0,0.31329600016276044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,128,1,float16,float16,0,0.3451146682103475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,128,1,float16,fp8,0,0.3358293374379476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,128,1,fp8,fp8,0,0.27853866418202716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,128,1,float16,float16,0,0.346346656481425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,128,1,fp8,fp8,0,0.28092267115910846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,128,1,float16,fp8,0,0.33956265449523926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,128,1,float16,float16,0,0.3487093448638916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,128,1,float16,fp8,0,0.3425813515981038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,128,1,fp8,fp8,0,0.2874026695887248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,128,1,float16,float16,0,0.19361066818237305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,128,1,float16,fp8,0,0.1916053295135498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,128,1,fp8,fp8,0,0.17359467347462973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,128,1,float16,float16,0,0.18216532468795776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,128,1,float16,fp8,0,0.17850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,128,1,fp8,fp8,0,0.15828800201416016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,128,1,float16,fp8,0,0.1787733236948649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,128,1,float16,float16,0,0.1822133262952169
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,128,1,fp8,fp8,0,0.15853866934776306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,128,1,float16,float16,0,0.18376533190409342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,128,1,float16,fp8,0,0.1792746583620707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,128,1,fp8,fp8,0,0.1612106661001841
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,128,1,float16,fp8,0,0.18078400691350302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,128,1,fp8,fp8,0,0.1641813317934672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,128,1,float16,float16,0,0.11461866895357768
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,128,1,float16,fp8,0,0.11321600278218587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,128,1,fp8,fp8,0,0.10593066612879436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,128,1,float16,float16,0,0.18649067481358847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,128,1,float16,float16,0,0.10979732871055603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,128,1,float16,fp8,0,0.10844266414642334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,128,1,fp8,fp8,0,0.09496000409126282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,128,1,float16,float16,0,0.10987200339635213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,128,1,fp8,fp8,0,0.09511466821034749
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,128,1,float16,fp8,0,0.10828266541163127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,128,1,float16,float16,0,0.11031466722488403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,128,1,fp8,fp8,0,0.09687999884287517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,128,1,float16,float16,0,0.11178666353225708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,128,1,float16,fp8,0,0.10868799686431885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,128,1,float16,fp8,0,0.10943466424942017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,128,1,fp8,fp8,0,0.10056533416112264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,128,1,float16,float16,0,3.06932799021403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,128,1,float16,fp8,0,2.954037348429362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,128,1,fp8,fp8,0,2.6175626118977866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,128,1,float16,fp8,0,2.9673439661661782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,128,1,fp8,fp8,0,2.6380319595336914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,128,1,float16,float16,0,3.0850772857666016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,128,1,float16,fp8,0,3.007450739542643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,128,1,float16,float16,0,3.1225760777791343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,128,1,float16,float16,0,1.535520076751709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,128,1,fp8,fp8,0,2.679861386617025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,128,1,float16,float16,0,3.1536054611206055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,128,1,fp8,fp8,0,2.7298558553059897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,128,1,float16,fp8,0,1.48691193262736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,128,1,float16,fp8,0,1.6055466334025066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,128,1,float16,float16,0,1.6546667416890461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,128,1,fp8,fp8,0,1.4807626406351726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,128,1,float16,fp8,0,3.0421972274780273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,128,1,fp8,fp8,0,1.3186986446380615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,128,1,float16,float16,0,1.541445255279541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,128,1,float16,fp8,0,1.4986880620320637
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,128,1,fp8,fp8,0,1.3270400365193684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,128,1,fp8,fp8,0,1.3491840362548828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,128,1,float16,float16,0,1.5543360710144043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,128,1,float16,fp8,0,1.5105600357055664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,128,1,float16,float16,0,1.5726826985677083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,128,1,float16,fp8,0,1.5354666709899902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,128,1,float16,fp8,0,0.7649760246276855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,128,1,float16,float16,0,0.7861173152923584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,128,1,float16,float16,0,0.7882346312204996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,128,1,float16,float16,0,0.8357280095418295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,128,1,float16,fp8,0,0.8224159876505533
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,128,1,fp8,fp8,0,0.6779999732971191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,128,1,fp8,fp8,0,1.3746933937072754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,128,1,fp8,fp8,0,0.7626240253448486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,128,1,float16,fp8,0,0.7698506514231364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,128,1,fp8,fp8,0,0.684613307317098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,128,1,float16,float16,0,0.7935520013173422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,128,1,float16,fp8,0,0.777621348698934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,128,1,fp8,fp8,0,0.6897813479105631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,128,1,float16,float16,0,0.80293869972229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,128,1,float16,fp8,0,0.7911840279897054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,128,1,fp8,fp8,0,0.707914670308431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,128,1,float16,float16,0,0.4187146822611491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,128,1,float16,float16,0,0.44236799081166583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,128,1,float16,fp8,0,0.4044586817423503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,128,1,fp8,fp8,0,0.36239465077718097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,128,1,float16,float16,0,0.4190986553827922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,128,1,float16,fp8,0,0.4367733399073283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,128,1,fp8,fp8,0,0.4030826489130656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,128,1,float16,fp8,0,0.4092053174972534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,128,1,fp8,fp8,0,0.36497068405151367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,128,1,float16,fp8,0,0.41069865226745605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,128,1,float16,float16,0,0.4222133159637451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,128,1,fp8,fp8,0,0.3692479928334554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,128,1,float16,float16,0,0.4264106750488281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,128,1,fp8,fp8,0,0.37485865751902264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,128,1,float16,float16,0,0.24344533681869507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,128,1,float16,fp8,0,0.41838932037353516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,128,1,float16,fp8,0,0.24152533213297525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,128,1,float16,fp8,0,0.224671999613444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,128,1,float16,float16,0,0.23082667589187622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,128,1,fp8,fp8,0,0.21619733174641928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,128,1,fp8,fp8,0,0.1941279967625936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,128,1,float16,float16,0,0.23082667589187622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,128,1,float16,fp8,0,0.22684266169865927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,128,1,fp8,fp8,0,0.19524266322453818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,128,1,float16,float16,0,0.23455999294916788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,128,1,float16,fp8,0,0.22804800669352213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,128,1,fp8,fp8,0,0.20094933112462363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,128,1,fp8,fp8,0,0.19785600900650024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,128,1,float16,float16,0,0.23545066515604654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,128,1,float16,fp8,0,0.23198399941126505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,128,1,float16,fp8,0,0.13343466321627298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,128,1,float16,float16,0,0.13513599832852682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,128,1,fp8,fp8,0,0.12650133172671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,128,1,float16,float16,0,0.12455999851226807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,128,1,float16,fp8,0,0.12307733297348022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,128,1,float16,float16,0,0.12583466370900473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,128,1,fp8,fp8,0,0.11241066455841064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,128,1,float16,fp8,0,0.12304533521334331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,128,1,fp8,fp8,0,0.11401066184043884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,128,1,float16,float16,0,0.12761066357294717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,128,1,float16,fp8,0,0.12475200494130452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,128,1,fp8,fp8,0,0.11637866497039795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,128,1,float16,float16,0,0.1283626655737559
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,128,1,float16,fp8,0,0.12594667077064514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,128,1,float16,float16,0,0.08319999774297078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,128,1,fp8,fp8,0,0.07941333452860515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,128,1,float16,float16,0,0.07939733564853668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,128,1,fp8,fp8,0,0.11845333377520244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,128,1,float16,fp8,0,0.08186133205890656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,128,1,fp8,fp8,0,0.0710506687561671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,128,1,float16,fp8,0,0.07761066655317943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,128,1,float16,float16,0,0.07932266592979431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,128,1,float16,fp8,0,0.07860800127188365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,128,1,fp8,fp8,0,0.07115200161933899
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,128,1,float16,float16,0,0.07990399996439616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,128,1,fp8,fp8,0,0.07147733370463054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,128,1,float16,fp8,0,0.07858133316040039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,128,1,float16,float16,0,0.07971733311812083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,128,1,fp8,fp8,0,0.07262933254241943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,128,1,float16,fp8,0,0.07866133252779643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,128,1,float16,float16,0,2.9743947982788086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,128,1,float16,fp8,0,2.9109811782836914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,128,1,fp8,fp8,0,2.62116273244222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,128,1,float16,fp8,0,3.001317342122396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,128,1,float16,float16,0,3.014927864074707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,128,1,fp8,fp8,0,2.7220214207967124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,128,1,float16,fp8,0,3.1568800608317056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,128,1,float16,float16,0,3.1181119283040366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,128,1,float16,float16,0,1.4693013827006023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,128,1,fp8,fp8,0,2.952159881591797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,128,1,float16,float16,0,3.1602293650309243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,128,1,float16,fp8,0,1.4647626876831055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,128,1,float16,fp8,0,3.181407928466797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,128,1,float16,fp8,0,1.6754719416300456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,128,1,float16,float16,0,1.7090026537577312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,128,1,fp8,fp8,0,1.510298728942871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,128,1,fp8,fp8,0,2.9872798919677734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,128,1,fp8,fp8,0,1.3142720063527424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,128,1,float16,float16,0,1.4902879397074382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,128,1,float16,fp8,0,1.4965866406758626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,128,1,fp8,fp8,0,1.366341272989909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,128,1,float16,float16,0,1.5653707186381023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,128,1,float16,fp8,0,1.5723412831624348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,128,1,fp8,fp8,0,1.486778736114502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,128,1,float16,fp8,0,1.6008693377176921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,128,1,float16,float16,0,1.5820959409077961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,128,1,float16,float16,0,0.7514080206553141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,128,1,float16,float16,0,0.8656266530354818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,128,1,float16,fp8,0,0.8442186514536539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,128,1,fp8,fp8,0,1.4998985926310222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,128,1,float16,fp8,0,0.7478559811909994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,128,1,fp8,fp8,0,0.6636106570561727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,128,1,fp8,fp8,0,0.7631733417510986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,128,1,float16,float16,0,0.7586346467336019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,128,1,float16,fp8,0,0.7546453475952148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,128,1,fp8,fp8,0,0.6981066862742106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,128,1,float16,float16,0,0.7864320278167725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,128,1,float16,fp8,0,0.7828853130340576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,128,1,fp8,fp8,0,0.7565653324127197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,128,1,float16,float16,0,0.794490655263265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,128,1,float16,float16,0,0.4400906562805176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,128,1,float16,fp8,0,0.8030347029368082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,128,1,float16,float16,0,0.3932853142420451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,128,1,float16,fp8,0,0.3906826575597127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,128,1,float16,fp8,0,0.4317866563796997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,128,1,fp8,fp8,0,0.3434400161107381
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,128,1,fp8,fp8,0,0.7607200145721436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,128,1,fp8,fp8,0,0.3954879840215047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,128,1,float16,fp8,0,0.3933279911677043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,128,1,float16,float16,0,0.3940053383509318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,128,1,fp8,fp8,0,0.36003732681274414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,128,1,float16,float16,0,0.40597331523895264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,128,1,float16,fp8,0,0.4015413522720337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,128,1,fp8,fp8,0,0.39180266857147217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,128,1,float16,fp8,0,0.40861864884694415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,128,1,float16,float16,0,0.4107573429743449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,128,1,fp8,fp8,0,0.39022401968638104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,128,1,float16,float16,0,0.21024533112843832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,128,1,float16,fp8,0,0.2320853273073832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,128,1,float16,float16,0,0.23474133014678955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,128,1,float16,fp8,0,0.20966400702794394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,128,1,fp8,fp8,0,0.2039146622021993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,128,1,fp8,fp8,0,0.17691733439763388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,128,1,fp8,fp8,0,0.18342934052149454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,128,1,float16,fp8,0,0.21017066637674967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,128,1,float16,float16,0,0.2107306718826294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,128,1,float16,float16,0,0.21660266319910684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,128,1,float16,fp8,0,0.21546133359273276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,128,1,fp8,fp8,0,0.19613866011301676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,128,1,float16,float16,0,0.22043200333913168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,128,1,float16,fp8,0,0.21825067202250162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,128,1,float16,float16,0,0.11177600423494975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,128,1,float16,float16,0,0.12969066699345908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,128,1,fp8,fp8,0,0.19785600900650024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,128,1,float16,fp8,0,0.11236799756685893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,128,1,fp8,fp8,0,0.11079466342926025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,128,1,float16,fp8,0,0.12824533383051553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,128,1,fp8,fp8,0,0.09737599889437358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,128,1,float16,float16,0,0.11221333344777425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,128,1,float16,fp8,0,0.11201066772143047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,128,1,fp8,fp8,0,0.09920533498128255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,128,1,float16,float16,0,0.11487999558448792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,128,1,float16,fp8,0,0.11517866452534993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,128,1,fp8,fp8,0,0.1065066655476888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,128,1,float16,fp8,0,0.11657599608103435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,128,1,fp8,fp8,0,0.10762666662534077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,128,1,float16,float16,0,0.11782933274904887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,128,1,float16,float16,0,0.06181866427262624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,128,1,float16,float16,0,0.07060266534487407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,128,1,float16,fp8,0,0.06249066690603892
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,128,1,float16,fp8,0,0.06901333232720692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,128,1,fp8,fp8,0,0.06566933294137318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,128,1,fp8,fp8,0,0.056645333766937256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,128,1,float16,float16,0,0.06261333326498668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,128,1,fp8,fp8,0,0.057434668143590294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,128,1,float16,float16,0,0.0644053320089976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,128,1,float16,fp8,0,0.06428800026575725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,128,1,fp8,fp8,0,0.06091733276844025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,128,1,float16,fp8,0,0.06294399996598561
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,128,1,float16,float16,0,0.06514666477839152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,128,1,float16,fp8,0,0.06446399788061778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,128,1,fp8,fp8,0,0.06299200157324474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,128,1,float16,fp8,0,0.04121600091457367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,128,1,float16,float16,0,0.0414986660083135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,128,1,fp8,fp8,0,0.03989866624275843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,128,1,float16,float16,0,0.03805333375930786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,128,1,fp8,fp8,0,0.03702933341264725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,128,1,float16,fp8,0,0.0386613334218661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,128,1,float16,float16,0,0.03828266759713491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,128,1,float16,fp8,0,0.03887466589609782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,128,1,fp8,fp8,0,0.03722666700681051
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,128,1,float16,float16,0,0.0393653338154157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,128,1,float16,fp8,0,0.03933866570393244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,128,1,float16,float16,0,0.038831998904546104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,128,1,fp8,fp8,0,0.03888533264398575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,128,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,128,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,1,128,1,float16,float16,0,2.3158666292826333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,1,128,1,fp8,fp8,0,2.1768213907877603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,1,128,1,float16,fp8,0,2.311024030049642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,2,128,1,float16,float16,0,2.390341281890869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,2,128,1,float16,fp8,0,2.380239963531494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,2,128,1,fp8,fp8,0,2.2564214070638022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,4,128,1,float16,fp8,0,2.494917392730713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,4,128,1,float16,float16,0,2.4818879763285318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,1,128,1,float16,float16,0,1.1459360122680664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,4,128,1,fp8,fp8,0,2.4964319864908853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,24,128,1,float16,float16,0,1.383845329284668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,24,128,1,float16,fp8,0,1.3595253626505535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,1,128,1,float16,fp8,0,1.1415733496348064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,8,128,1,fp8,fp8,0,2.5186452865600586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,8,128,1,float16,float16,0,2.5125600496927896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,24,8,128,1,float16,fp8,0,2.528810660044352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,1,128,1,fp8,fp8,0,1.0804479916890461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,24,128,1,fp8,fp8,0,1.2953813076019287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,2,128,1,float16,float16,0,1.1809066931406658
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,2,128,1,float16,fp8,0,1.162021319071452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,4,128,1,float16,float16,0,1.250970681508382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,4,128,1,float16,fp8,0,1.2362720171610515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,2,128,1,fp8,fp8,0,1.127674659093221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,4,128,1,fp8,fp8,0,1.2516427040100098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,8,128,1,float16,float16,0,1.2573920090993245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,1,128,1,float16,float16,0,0.5847253402074178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,8,128,1,float16,fp8,0,1.2685759862263997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,1,128,1,float16,fp8,0,0.5845439831415812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,24,128,1,float16,float16,0,0.6934719880421957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,24,8,128,1,fp8,fp8,0,1.2627413272857666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,1,128,1,fp8,fp8,0,0.5448213418324789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,24,128,1,float16,fp8,0,0.6777599652608236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,2,128,1,float16,float16,0,0.5909386475880941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,24,128,1,fp8,fp8,0,0.6469546556472778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,2,128,1,float16,fp8,0,0.5914773146311442
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,2,128,1,fp8,fp8,0,0.5735253492991129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,4,128,1,float16,float16,0,0.626693328221639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,4,128,1,float16,fp8,0,0.6150399843851725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,4,128,1,fp8,fp8,0,0.6367146571477255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,1,128,1,float16,float16,0,0.30587200323740643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,1,128,1,float16,fp8,0,0.3046879967053731
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,8,128,1,float16,fp8,0,0.6341813405354818
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,8,128,1,float16,float16,0,0.6348533233006796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,24,8,128,1,fp8,fp8,0,0.6410133441289266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,24,128,1,float16,fp8,0,0.3503306706746419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,24,128,1,fp8,fp8,0,0.33371198177337646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,24,128,1,float16,float16,0,0.35950934886932373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,1,128,1,fp8,fp8,0,0.2837653358777364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,2,128,1,float16,float16,0,0.31035733222961426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,2,128,1,fp8,fp8,0,0.29972267150878906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,2,128,1,float16,fp8,0,0.30799466371536255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,4,128,1,float16,float16,0,0.3216746648152669
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,4,128,1,float16,fp8,0,0.31779734293619794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,4,128,1,fp8,fp8,0,0.33010133107503253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,8,128,1,float16,fp8,0,0.32122133175532025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,24,128,1,float16,float16,0,0.193066676457723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,8,128,1,fp8,fp8,0,0.3306879997253418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,24,128,1,fp8,fp8,0,0.1754186749458313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,24,8,128,1,float16,float16,0,0.3245919942855835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,24,128,1,float16,fp8,0,0.18659200270970663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,1,128,1,float16,fp8,0,0.16248533129692078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,1,128,1,float16,float16,0,0.16316800316174826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,1,128,1,fp8,fp8,0,0.1513759990533193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,2,128,1,float16,fp8,0,0.1637173295021057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,2,128,1,float16,float16,0,0.16548267006874084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,2,128,1,fp8,fp8,0,0.15174399813016257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,4,128,1,float16,fp8,0,0.16866666078567505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,4,128,1,float16,float16,0,0.17056000232696533
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,4,128,1,fp8,fp8,0,0.16894400119781494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,8,128,1,float16,float16,0,0.17326400677363077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,8,128,1,float16,fp8,0,0.17142399152119955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,24,8,128,1,fp8,fp8,0,0.17144532998402914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,24,128,1,float16,float16,0,0.11179733276367188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,24,128,1,float16,fp8,0,0.10883200168609619
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,1,128,1,float16,fp8,0,0.09010133147239685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,1,128,1,float16,float16,0,0.0906880001227061
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,24,128,1,fp8,fp8,0,0.09584533174832661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,1,128,1,fp8,fp8,0,0.08399466673533122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,2,128,1,float16,fp8,0,0.09217600027720134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,2,128,1,float16,float16,0,0.09083200494448344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,2,128,1,fp8,fp8,0,0.08629332979520161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,4,128,1,float16,float16,0,0.09416533509890239
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,4,128,1,float16,fp8,0,0.09321600198745728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,8,128,1,float16,float16,0,0.09595200419425964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,4,128,1,fp8,fp8,0,0.09397866328557332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,8,128,1,float16,fp8,0,0.09502933422724406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,24,128,1,float16,float16,0,0.06071466704209646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,24,8,128,1,fp8,fp8,0,0.09406933188438416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,24,128,1,float16,fp8,0,0.059487998485565186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,1,128,1,float16,float16,0,0.051781331499417625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,24,128,1,fp8,fp8,0,0.05779199798901876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,1,128,1,float16,fp8,0,0.05179733534653982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,2,128,1,float16,float16,0,0.05227200190226237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,1,128,1,fp8,fp8,0,0.04929066697756449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,2,128,1,float16,fp8,0,0.05230399966239929
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,2,128,1,fp8,fp8,0,0.04911999901135763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,4,128,1,float16,fp8,0,0.053216000398000084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,4,128,1,float16,float16,0,0.053871999184290566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,4,128,1,fp8,fp8,0,0.05417066812515259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,8,128,1,float16,float16,0,0.05504000186920166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,8,128,1,float16,fp8,0,0.05479466418425242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,24,128,1,float16,float16,0,0.03660800059636434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,24,8,128,1,fp8,fp8,0,0.05443733433882395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,24,128,1,float16,fp8,0,0.037087999284267426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,24,128,1,fp8,fp8,0,0.03589866558710734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,1,128,1,float16,float16,0,0.03387733300526937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,2,128,1,float16,float16,0,0.03386666625738144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,1,128,1,fp8,fp8,0,0.033674667278925575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,2,128,1,float16,fp8,0,0.03425599883000056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,4,128,1,float16,float16,0,0.035045333206653595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,1,128,1,float16,fp8,0,0.03396799912055334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,4,128,1,float16,fp8,0,0.0354666660229365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,4,128,1,fp8,fp8,0,0.03510933369398117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,2,128,1,fp8,fp8,0,0.03396799912055334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,8,128,1,float16,float16,0,0.035045333206653595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,8,128,1,float16,fp8,0,0.03514133393764496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,24,8,128,1,fp8,fp8,0,0.035317334036032356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,24,128,1,float16,float16,0,0.029850666721661884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,24,128,1,fp8,fp8,0,0.027823999524116516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,1,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,24,128,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,1,128,1,fp8,fp8,0,0.026501332720120747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,2,128,1,float16,float16,0,0.027727998793125153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,1,128,1,float16,fp8,0,0.027701333165168762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,2,128,1,float16,fp8,0,0.02792000025510788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,2,128,1,fp8,fp8,0,0.026693334182103474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,4,128,1,float16,float16,0,0.028165332973003387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,4,128,1,float16,fp8,0,0.02850666642189026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,4,128,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,8,128,1,float16,fp8,0,0.028938665986061096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,8,128,1,float16,float16,0,0.028234665592511494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,24,8,128,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,1,128,1,float16,float16,0,0.9730292956034342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,1,128,1,float16,fp8,0,0.9673546950022379
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,1,128,1,fp8,fp8,0,0.9432319800059
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,2,128,1,float16,fp8,0,1.008992036183675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,2,128,1,float16,float16,0,1.0224586327870686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,2,128,1,fp8,fp8,0,0.9824106693267822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,4,128,1,float16,float16,0,1.0846933523813884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,4,128,1,float16,fp8,0,1.0698026816050212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,1,128,1,float16,float16,0,0.5002346833546957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,4,128,1,fp8,fp8,0,1.1084907054901123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,24,128,1,float16,float16,0,0.6186879873275757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,8,128,1,float16,float16,0,1.0920426845550537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,8,128,1,float16,fp8,0,1.0977439880371094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,24,128,1,float16,fp8,0,0.6026506821314493
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,24,8,128,1,fp8,fp8,0,1.1279093424479167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,24,128,1,fp8,fp8,0,0.5787733395894369
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,1,128,1,float16,fp8,0,0.4971253474553426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,1,128,1,fp8,fp8,0,0.47419198354085285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,2,128,1,float16,float16,0,0.5131093263626099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,2,128,1,fp8,fp8,0,0.4995466470718384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,2,128,1,float16,fp8,0,0.5052853425343832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,4,128,1,float16,fp8,0,0.5363466739654541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,4,128,1,float16,float16,0,0.5419146617253622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,4,128,1,fp8,fp8,0,0.5646666685740153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,8,128,1,float16,float16,0,0.5507946809132894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,8,128,1,float16,fp8,0,0.5450133482615153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,1,128,1,float16,fp8,0,0.2595040003458659
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,1,128,1,float16,float16,0,0.26055999596913654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,24,128,1,float16,float16,0,0.31780799229939777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,24,128,1,float16,fp8,0,0.30929599205652875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,24,8,128,1,fp8,fp8,0,0.5719786485036215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,24,128,1,fp8,fp8,0,0.2992960015932719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,1,128,1,fp8,fp8,0,0.24830400943756104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,2,128,1,float16,float16,0,0.2666933337847392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,2,128,1,float16,fp8,0,0.2644960085550944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,2,128,1,fp8,fp8,0,0.2568693359692891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,4,128,1,float16,float16,0,0.27780266602834064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,4,128,1,fp8,fp8,0,0.29337600866953534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,4,128,1,float16,fp8,0,0.27462400992711383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,8,128,1,float16,float16,0,0.28068800767262775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,8,128,1,fp8,fp8,0,0.2954933245976766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,24,8,128,1,float16,fp8,0,0.2772960066795349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,1,128,1,float16,fp8,0,0.13963733116785684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,1,128,1,float16,float16,0,0.13981866836547852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,24,128,1,float16,float16,0,0.16928533713022867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,24,128,1,float16,fp8,0,0.16451733311017355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,24,128,1,fp8,fp8,0,0.15889066457748413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,1,128,1,fp8,fp8,0,0.13500799735387167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,2,128,1,float16,float16,0,0.14231466253598532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,2,128,1,fp8,fp8,0,0.13665067156155905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,4,128,1,float16,float16,0,0.14803199966748556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,2,128,1,float16,fp8,0,0.14196266730626425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,4,128,1,float16,fp8,0,0.14647466937700906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,4,128,1,fp8,fp8,0,0.15282666683197021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,8,128,1,float16,float16,0,0.150736004114151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,8,128,1,float16,fp8,0,0.14899200201034546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,1,128,1,float16,float16,0,0.08024533092975616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,1,128,1,float16,fp8,0,0.08010133107503255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,24,8,128,1,fp8,fp8,0,0.15774400035540262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,24,128,1,float16,float16,0,0.10028266906738281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,24,128,1,float16,fp8,0,0.10051199793815613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,24,128,1,fp8,fp8,0,0.08890133102734883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,1,128,1,fp8,fp8,0,0.07561600208282471
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,2,128,1,float16,fp8,0,0.08145600060621898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,2,128,1,float16,float16,0,0.08123200138409932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,2,128,1,fp8,fp8,0,0.07740266621112823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,4,128,1,float16,fp8,0,0.08411733309427898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,4,128,1,float16,float16,0,0.08397333820660909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,4,128,1,fp8,fp8,0,0.08627200126647949
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,8,128,1,float16,fp8,0,0.08489599823951721
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,8,128,1,float16,float16,0,0.08641067147254944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,1,128,1,float16,float16,0,0.04684799909591675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,24,8,128,1,fp8,fp8,0,0.08571733037630717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,24,128,1,float16,float16,0,0.055957332253456116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,24,128,1,float16,fp8,0,0.054586668809254967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,1,128,1,float16,fp8,0,0.04683200021584829
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,24,128,1,fp8,fp8,0,0.0531626691420873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,1,128,1,fp8,fp8,0,0.042805333932240806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,2,128,1,float16,fp8,0,0.047653332352638245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,2,128,1,float16,float16,0,0.04753600060939789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,4,128,1,float16,float16,0,0.049312000473340355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,4,128,1,float16,fp8,0,0.048954665660858154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,4,128,1,fp8,fp8,0,0.048026666045188904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,8,128,1,float16,float16,0,0.05034666756788889
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,2,128,1,fp8,fp8,0,0.043605332573254905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,8,128,1,float16,fp8,0,0.04914666712284088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,24,8,128,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,1,128,1,float16,fp8,0,0.03306666761636734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,24,128,1,float16,float16,0,0.035904000202814736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,1,128,1,float16,float16,0,0.033002667129039764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,24,128,1,fp8,fp8,0,0.034287999073664345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,24,128,1,float16,fp8,0,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,1,128,1,fp8,fp8,0,0.03029866764942805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,2,128,1,float16,float16,0,0.03289599965016047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,2,128,1,float16,fp8,0,0.033285332222779594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,2,128,1,fp8,fp8,0,0.031125334401925404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,4,128,1,float16,fp8,0,0.03357866654793421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,4,128,1,float16,float16,0,0.033759998778502144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,4,128,1,fp8,fp8,0,0.032831999162832894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,8,128,1,float16,float16,0,0.03403199960788091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,8,128,1,float16,fp8,0,0.03409066547950109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,24,8,128,1,fp8,fp8,0,0.032511999209721885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,24,128,1,float16,float16,0,0.025781333446502686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,24,128,1,float16,fp8,0,0.026314665873845417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,1,128,1,float16,float16,0,0.024432001014550526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,24,128,1,fp8,fp8,0,0.024853333830833435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,1,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,2,128,1,float16,float16,0,0.02462933212518692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,2,128,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,1,128,1,float16,fp8,0,0.02455466737349828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,2,128,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,4,128,1,float16,float16,0,0.025055999557177227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,4,128,1,fp8,fp8,0,0.023973333338896435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,4,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,8,128,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,8,128,1,float16,float16,0,0.02532266577084859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,24,8,128,1,fp8,fp8,0,0.02462399999300639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,24,128,1,float16,fp8,0,0.02186666677395503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,1,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,24,128,1,fp8,fp8,0,0.020629333953062694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,1,128,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,24,128,1,float16,float16,0,0.021776000658671062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,2,128,1,float16,fp8,0,0.021829334398110706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,1,128,1,fp8,fp8,0,0.020288000504175823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,2,128,1,float16,float16,0,0.021386665602525074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,4,128,1,float16,float16,0,0.021231998999913532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,2,128,1,fp8,fp8,0,0.020090666910012562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,4,128,1,float16,fp8,0,0.022106667359670002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,4,128,1,fp8,fp8,0,0.020266667008399963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,8,128,1,float16,fp8,0,0.021669333179791767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,8,128,1,fp8,fp8,0,0.02042666698495547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,24,8,128,1,float16,float16,0,0.021525333325068157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,1,128,1,float16,fp8,0,0.4535200198491414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,1,128,1,float16,float16,0,0.45098666350046795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,1,128,1,fp8,fp8,0,0.470085342725118
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,2,128,1,float16,float16,0,0.46129600207010907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,2,128,1,fp8,fp8,0,0.49741868178049725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,2,128,1,float16,fp8,0,0.4609493414560954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,4,128,1,float16,float16,0,0.5001333157221476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,4,128,1,float16,fp8,0,0.49768535296122235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,4,128,1,fp8,fp8,0,0.5651306708653768
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,1,128,1,float16,float16,0,0.23803200324376425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,24,128,1,float16,float16,0,0.30802132685979206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,8,128,1,fp8,fp8,0,0.5716106494267782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,8,128,1,float16,fp8,0,0.5007199843724569
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,24,128,1,fp8,fp8,0,0.29798932870229083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,24,128,1,float16,fp8,0,0.3115413387616475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,24,8,128,1,float16,float16,0,0.5025440057118734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,1,128,1,float16,fp8,0,0.24041599035263062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,1,128,1,fp8,fp8,0,0.2449386715888977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,2,128,1,float16,float16,0,0.24343466758728027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,2,128,1,float16,fp8,0,0.24154132604599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,2,128,1,fp8,fp8,0,0.2653973301251729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,4,128,1,float16,float16,0,0.2576106588045756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,4,128,1,float16,fp8,0,0.255349338054657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,4,128,1,fp8,fp8,0,0.2934719920158386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,8,128,1,float16,float16,0,0.2617866595586141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,8,128,1,float16,fp8,0,0.25592533747355145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,1,128,1,float16,float16,0,0.1306880017121633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,24,128,1,float16,float16,0,0.16913066307703653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,24,128,1,float16,fp8,0,0.16301332910855612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,24,8,128,1,fp8,fp8,0,0.295199990272522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,1,128,1,fp8,fp8,0,0.1341813306013743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,1,128,1,float16,fp8,0,0.1302720010280609
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,24,128,1,fp8,fp8,0,0.16037866473197937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,2,128,1,fp8,fp8,0,0.13621866703033447
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,2,128,1,float16,float16,0,0.13218667109807333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,2,128,1,float16,fp8,0,0.13346667091051737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,4,128,1,float16,float16,0,0.13869866728782654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,4,128,1,float16,fp8,0,0.13727999726931253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,4,128,1,fp8,fp8,0,0.15286399920781454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,8,128,1,float16,float16,0,0.14295466740926108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,8,128,1,float16,fp8,0,0.140255997578303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,1,128,1,float16,float16,0,0.07417599856853485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,1,128,1,float16,fp8,0,0.07495999832948048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,24,128,1,float16,float16,0,0.09482666850090027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,24,8,128,1,fp8,fp8,0,0.15595733126004538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,24,128,1,float16,fp8,0,0.0920799970626831
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,24,128,1,fp8,fp8,0,0.08938666184743245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,1,128,1,fp8,fp8,0,0.07539733250935872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,2,128,1,float16,float16,0,0.0764213353395462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,2,128,1,fp8,fp8,0,0.0773226668437322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,2,128,1,float16,fp8,0,0.07653866708278656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,4,128,1,float16,float16,0,0.0802400012811025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,4,128,1,float16,fp8,0,0.07939200103282928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,4,128,1,fp8,fp8,0,0.08463467160860698
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,8,128,1,float16,float16,0,0.08096533517042796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,8,128,1,float16,fp8,0,0.08094933132330577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,24,8,128,1,fp8,fp8,0,0.08457066615422566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,1,128,1,float16,float16,0,0.044069334864616394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,24,128,1,float16,float16,0,0.053029333551724754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,24,128,1,float16,fp8,0,0.05177066723505656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,1,128,1,float16,fp8,0,0.044533332188924156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,24,128,1,fp8,fp8,0,0.05306666592756907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,1,128,1,fp8,fp8,0,0.04310933252175649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,2,128,1,float16,float16,0,0.04499199986457825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,2,128,1,float16,fp8,0,0.04490133126576742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,2,128,1,fp8,fp8,0,0.044362664222717285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,4,128,1,float16,float16,0,0.04621866842110952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,4,128,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,4,128,1,fp8,fp8,0,0.04806933303674062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,8,128,1,float16,float16,0,0.04702933132648468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,8,128,1,float16,fp8,0,0.047354668378829956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,24,8,128,1,fp8,fp8,0,0.04902400076389313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,24,128,1,float16,float16,0,0.034234667817751564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,24,128,1,float16,fp8,0,0.03421333432197571
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,24,128,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,1,128,1,float16,float16,0,0.031146667897701263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,1,128,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,1,128,1,fp8,fp8,0,0.03090133269627889
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,2,128,1,float16,float16,0,0.031856000423431396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,2,128,1,float16,fp8,0,0.0321066677570343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,2,128,1,fp8,fp8,0,0.030794667700926464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,4,128,1,float16,float16,0,0.03266666581233343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,4,128,1,float16,fp8,0,0.03268266717592875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,4,128,1,fp8,fp8,0,0.03222399950027466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,8,128,1,float16,float16,0,0.03256533294916153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,8,128,1,fp8,fp8,0,0.03260799994071325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,24,128,1,float16,fp8,0,0.02470933397610982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,24,8,128,1,float16,fp8,0,0.0330079992612203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,24,128,1,float16,float16,0,0.025013332565625507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,1,128,1,float16,float16,0,0.02347733328739802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,24,128,1,fp8,fp8,0,0.02465066562096278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,1,128,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,2,128,1,float16,float16,0,0.02362666775782903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,1,128,1,fp8,fp8,0,0.023024000227451324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,2,128,1,float16,fp8,0,0.02380799998839696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,4,128,1,float16,fp8,0,0.0242399995525678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,8,128,1,float16,float16,0,0.024277334411938984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,4,128,1,fp8,fp8,0,0.024703999360402424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,4,128,1,float16,float16,0,0.024010665714740753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,8,128,1,float16,fp8,0,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,2,128,1,fp8,fp8,0,0.02363733450571696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,24,8,128,1,fp8,fp8,0,0.024586667617162068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,24,128,1,float16,float16,0,0.019487999379634857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,24,128,1,float16,fp8,0,0.02022933339079221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,24,128,1,fp8,fp8,0,0.020074666788180668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,1,128,1,float16,float16,0,0.020015999674797058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,1,128,1,float16,fp8,0,0.019909333437681198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,1,128,1,fp8,fp8,0,0.01987733319401741
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,2,128,1,float16,float16,0,0.020074666788180668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,2,128,1,float16,fp8,0,0.02004266654451688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,2,128,1,fp8,fp8,0,0.019765333582957584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,4,128,1,float16,float16,0,0.01982933282852173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,4,128,1,float16,fp8,0,0.01993600030740102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,4,128,1,fp8,fp8,0,0.020293333878119785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,8,128,1,float16,fp8,0,0.020224000016848247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,8,128,1,float16,float16,0,0.019760000209013622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,24,8,128,1,fp8,fp8,0,0.020501332978407543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,24,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,24,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,1,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,24,128,1,float16,float16,0,0.018725333114465077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,1,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,1,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,2,128,1,float16,float16,0,0.018858666221300762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,2,128,1,float16,fp8,0,0.01960533360640208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,2,128,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,4,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,4,128,1,float16,fp8,0,0.019653332730134327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,4,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,8,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,8,128,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,24,8,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,1,128,1,fp8,fp8,0,0.3257333238919576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,1,128,1,float16,fp8,0,0.2985173265139262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,1,128,1,float16,float16,0,0.29782400528589886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,2,128,1,float16,float16,0,0.3010666569073995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,2,128,1,fp8,fp8,0,0.33701332410176593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,2,128,1,float16,fp8,0,0.3004586696624756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,4,128,1,float16,float16,0,0.3141760031382243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,4,128,1,float16,fp8,0,0.31537065903345746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,4,128,1,fp8,fp8,0,0.37698666254679364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,8,128,1,float16,float16,0,0.3158506751060486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,1,128,1,float16,float16,0,0.15797866384188333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,24,128,1,float16,float16,0,0.17870932817459106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,8,128,1,fp8,fp8,0,0.37988801797231037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,24,8,128,1,float16,fp8,0,0.3144373297691345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,24,128,1,float16,fp8,0,0.1755626598993937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,24,128,1,fp8,fp8,0,0.2003306746482849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,1,128,1,float16,fp8,0,0.15920533736546835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,1,128,1,fp8,fp8,0,0.1721386710802714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,2,128,1,float16,fp8,0,0.15948800245920816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,2,128,1,float16,float16,0,0.16009599963823953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,2,128,1,fp8,fp8,0,0.17662400007247925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,4,128,1,fp8,fp8,0,0.19165867567062378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,4,128,1,float16,float16,0,0.1657439966996511
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,4,128,1,float16,fp8,0,0.16532267133394876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,8,128,1,float16,float16,0,0.16704533497492471
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,8,128,1,float16,fp8,0,0.1662399967511495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,1,128,1,float16,float16,0,0.08776000142097473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,1,128,1,float16,fp8,0,0.08860799670219421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,24,8,128,1,fp8,fp8,0,0.19591999053955078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,24,128,1,float16,float16,0,0.09705600142478943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,24,128,1,float16,fp8,0,0.09475732843081157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,24,128,1,fp8,fp8,0,0.10845333337783813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,1,128,1,fp8,fp8,0,0.09603733817736308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,2,128,1,float16,float16,0,0.08898666501045227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,2,128,1,float16,fp8,0,0.08930133779843648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,2,128,1,fp8,fp8,0,0.09745599826176961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,4,128,1,float16,float16,0,0.09220799803733826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,8,128,1,float16,float16,0,0.09297600388526917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,4,128,1,fp8,fp8,0,0.10517866412798564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,4,128,1,float16,fp8,0,0.09159466624259949
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,8,128,1,float16,fp8,0,0.09292266766230266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,24,8,128,1,fp8,fp8,0,0.1051680048306783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,24,128,1,fp8,fp8,0,0.0632479985555013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,24,128,1,float16,float16,0,0.056330665946006775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,24,128,1,float16,fp8,0,0.05433600147565206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,1,128,1,float16,float16,0,0.050714666644732155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,1,128,1,float16,fp8,0,0.05054933329423269
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,2,128,1,float16,float16,0,0.05097066859404246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,1,128,1,fp8,fp8,0,0.054287999868392944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,2,128,1,float16,fp8,0,0.05106133222579956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,2,128,1,fp8,fp8,0,0.05487466851870219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,4,128,1,float16,float16,0,0.053029333551724754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,4,128,1,float16,fp8,0,0.05319466690222422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,4,128,1,fp8,fp8,0,0.0582239975531896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,8,128,1,float16,float16,0,0.05319466690222422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,8,128,1,float16,fp8,0,0.05333333214124044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,24,128,1,float16,float16,0,0.03330666571855545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,1,128,1,float16,float16,0,0.032127998769283295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,1,128,1,float16,fp8,0,0.03236799935499827
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,24,128,1,float16,fp8,0,0.03299733251333237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,24,128,1,fp8,fp8,0,0.03639466563860575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,1,128,1,fp8,fp8,0,0.034416000048319496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,24,8,128,1,fp8,fp8,0,0.060602664947509766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,2,128,1,float16,float16,0,0.03252266595760981
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,2,128,1,float16,fp8,0,0.03275733441114426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,4,128,1,float16,float16,0,0.0334346666932106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,2,128,1,fp8,fp8,0,0.03470933437347412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,4,128,1,fp8,fp8,0,0.03625066578388214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,4,128,1,float16,fp8,0,0.033759998778502144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,8,128,1,float16,float16,0,0.03367999941110611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,8,128,1,float16,fp8,0,0.03399466723203659
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,24,8,128,1,fp8,fp8,0,0.03654933224121729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,24,128,1,float16,float16,0,0.026549334327379864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,24,128,1,fp8,fp8,0,0.02871999889612198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,1,128,1,float16,fp8,0,0.026261332134405773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,1,128,1,float16,float16,0,0.02603200078010559
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,24,128,1,float16,fp8,0,0.026816000541051228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,1,128,1,fp8,fp8,0,0.026954665780067444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,2,128,1,float16,fp8,0,0.025797332326571148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,2,128,1,fp8,fp8,0,0.02735999971628189
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,4,128,1,float16,float16,0,0.026746665438016255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,4,128,1,float16,fp8,0,0.02698666602373123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,8,128,1,float16,float16,0,0.026533332963784535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,8,128,1,float16,fp8,0,0.026911998788515728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,2,128,1,float16,float16,0,0.025973332424958546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,4,128,1,fp8,fp8,0,0.02852799991766612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,24,8,128,1,fp8,fp8,0,0.028922667105992634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,24,128,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,24,128,1,float16,float16,0,0.019199999670187633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,24,128,1,fp8,fp8,0,0.02063999945918719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,1,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,1,128,1,float16,fp8,0,0.018735999862353008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,2,128,1,float16,float16,0,0.018842666099468868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,2,128,1,float16,fp8,0,0.019509332875410717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,1,128,1,fp8,fp8,0,0.020069333414236706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,2,128,1,fp8,fp8,0,0.019808000574509304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,4,128,1,float16,float16,0,0.018592000007629395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,4,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,4,128,1,fp8,fp8,0,0.02000533292690913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,8,128,1,float16,float16,0,0.018965333700180054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,8,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,24,8,128,1,fp8,fp8,0,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,24,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,1,128,1,float16,float16,0,0.01810666670401891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,24,128,1,float16,fp8,0,0.018207999567190807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,1,128,1,float16,fp8,0,0.0184906671444575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,24,128,1,float16,float16,0,0.018063999712467194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,1,128,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,2,128,1,float16,float16,0,0.01825599993268649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,2,128,1,float16,fp8,0,0.018288000176350277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,4,128,1,float16,float16,0,0.018405333161354065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,2,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,4,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,4,128,1,float16,fp8,0,0.018378666291634243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,8,128,1,float16,fp8,0,0.018458666900793713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,8,128,1,float16,float16,0,0.01815466706951459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,24,8,128,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,24,128,1,float16,fp8,0,0.017903999735911686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,1,128,1,float16,float16,0,0.017514667163292568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,24,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,1,128,1,float16,fp8,0,0.017808000246683758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,24,128,1,fp8,fp8,0,0.01800000046690305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,1,128,1,fp8,fp8,0,0.01836266616980235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,2,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,2,128,1,float16,fp8,0,0.018053332964579265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,2,128,1,fp8,fp8,0,0.018543999642133713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,4,128,1,float16,float16,0,0.01729600007335345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,4,128,1,float16,fp8,0,0.01766933376590411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,4,128,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,8,128,1,float16,float16,0,0.0174346665541331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,8,128,1,float16,fp8,0,0.017840000490347546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,24,8,128,1,fp8,fp8,0,0.01829333355029424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,1,128,1,float16,fp8,0,0.2071359952290853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,1,128,1,fp8,fp8,0,0.2622133294741313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,1,128,1,float16,float16,0,0.20650132497151694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,2,128,1,float16,float16,0,0.2097973426183065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,2,128,1,float16,fp8,0,0.20996799071629843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,2,128,1,fp8,fp8,0,0.2639946738878886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,4,128,1,float16,float16,0,0.2184106707572937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,4,128,1,float16,fp8,0,0.21700799465179443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,4,128,1,fp8,fp8,0,0.2815946737925212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,8,128,1,float16,float16,0,0.22125866015752158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,24,128,1,float16,float16,0,0.12301333745320638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,8,128,1,fp8,fp8,0,0.28595199187596637
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,24,128,1,float16,fp8,0,0.12052266796429952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,24,8,128,1,float16,fp8,0,0.21975467602411905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,24,128,1,fp8,fp8,0,0.15610133608182272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,1,128,1,float16,float16,0,0.11026666561762492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,1,128,1,float16,fp8,0,0.11053333679835002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,1,128,1,fp8,fp8,0,0.1426346699396769
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,2,128,1,float16,float16,0,0.11175466577212016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,2,128,1,fp8,fp8,0,0.14446399609247842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,2,128,1,float16,fp8,0,0.11172800262769063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,4,128,1,float16,float16,0,0.11598400274912517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,4,128,1,float16,fp8,0,0.1151039997736613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,8,128,1,float16,float16,0,0.11820800105730693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,4,128,1,fp8,fp8,0,0.1523306667804718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,8,128,1,float16,fp8,0,0.11731200416882832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,24,8,128,1,fp8,fp8,0,0.15226667126019797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,1,128,1,float16,float16,0,0.06026133398214976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,24,128,1,fp8,fp8,0,0.08674133817354839
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,24,128,1,float16,fp8,0,0.06637866795063019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,1,128,1,float16,fp8,0,0.06106133262316386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,24,128,1,float16,float16,0,0.06726400057474773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,2,128,1,float16,float16,0,0.06137066582838694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,2,128,1,fp8,fp8,0,0.07820799946784973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,1,128,1,fp8,fp8,0,0.07750399907430013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,4,128,1,float16,fp8,0,0.06371733546257019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,4,128,1,fp8,fp8,0,0.08270933230717976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,2,128,1,float16,fp8,0,0.06202666461467743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,8,128,1,float16,float16,0,0.0643093337615331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,4,128,1,float16,float16,0,0.06261333326498668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,8,128,1,float16,fp8,0,0.06442666550477345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,24,8,128,1,fp8,fp8,0,0.08417066931724548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,24,128,1,float16,fp8,0,0.03822399924198786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,24,128,1,fp8,fp8,0,0.04861866434415182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,24,128,1,float16,float16,0,0.038202665746212006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,2,128,1,float16,float16,0,0.038058665891488395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,1,128,1,float16,float16,0,0.037434667348861694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,1,128,1,fp8,fp8,0,0.046207999189694725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,2,128,1,float16,fp8,0,0.037871999045213066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,1,128,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,2,128,1,fp8,fp8,0,0.04717333118120829
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,4,128,1,float16,float16,0,0.03851733356714249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,4,128,1,fp8,fp8,0,0.04836266736189524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,4,128,1,float16,fp8,0,0.039349332451820374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,8,128,1,float16,float16,0,0.03865066667397817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,8,128,1,float16,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,24,8,128,1,fp8,fp8,0,0.048058668772379555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,1,128,1,float16,float16,0,0.026399999856948853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,24,128,1,float16,fp8,0,0.02752533306678136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,24,128,1,fp8,fp8,0,0.032773333291212715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,24,128,1,float16,float16,0,0.026911998788515728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,2,128,1,fp8,fp8,0,0.031317333380381264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,1,128,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,2,128,1,float16,float16,0,0.027242665489514668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,2,128,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,4,128,1,float16,float16,0,0.026928000152111053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,1,128,1,float16,fp8,0,0.026975999275843304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,4,128,1,float16,fp8,0,0.027808000644048054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,8,128,1,float16,float16,0,0.02762666592995326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,8,128,1,fp8,fp8,0,0.03316266586383184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,8,128,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,24,128,1,float16,float16,0,0.02146666745344798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,24,128,1,float16,fp8,0,0.021536000072956085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,24,4,128,1,fp8,fp8,0,0.032511999209721885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,1,128,1,float16,float16,0,0.02145066608985265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,1,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,24,128,1,fp8,fp8,0,0.024373332659403484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,1,128,1,fp8,fp8,0,0.02380266785621643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,2,128,1,float16,float16,0,0.020975999534130096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,2,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,2,128,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,4,128,1,float16,fp8,0,0.021925332645575207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,4,128,1,fp8,fp8,0,0.023957334458827972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,4,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,8,128,1,float16,fp8,0,0.021594665944576263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,1,128,1,float16,float16,0,0.01738133281469345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,8,128,1,fp8,fp8,0,0.024325333535671234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,24,8,128,1,float16,float16,0,0.02718399961789449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,24,128,1,fp8,fp8,0,0.019904000063737232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,24,128,1,float16,fp8,0,0.01809599995613098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,24,128,1,float16,float16,0,0.01743999992807706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,2,128,1,float16,fp8,0,0.017968000223239262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,1,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,4,128,1,float16,fp8,0,0.018229333062966663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,4,128,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,2,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,1,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,2,128,1,float16,float16,0,0.017423999806245167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,4,128,1,float16,float16,0,0.017583999782800674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,8,128,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,24,128,1,float16,float16,0,0.016389333953460056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,8,128,1,float16,fp8,0,0.018239999810854595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,24,8,128,1,fp8,fp8,0,0.019978666057189304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,1,128,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,1,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,24,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,1,128,1,fp8,fp8,0,0.018239999810854595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,24,128,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,4,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,2,128,1,fp8,fp8,0,0.01836266616980235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,4,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,2,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,4,128,1,fp8,fp8,0,0.01836799954374631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,8,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,24,128,1,float16,float16,0,0.016176000237464905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,8,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,24,128,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,24,128,1,float16,fp8,0,0.016544000556071598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,24,8,128,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,1,128,1,float16,float16,0,0.016783999900023144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,2,128,1,fp8,fp8,0,0.01828266680240631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,2,128,1,float16,float16,0,0.01640533283352852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,1,128,1,fp8,fp8,0,0.01801066721479098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,1,128,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,2,128,1,float16,fp8,0,0.01657066618402799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,4,128,1,float16,float16,0,0.016352000335852306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,4,128,1,fp8,fp8,0,0.01848000039656957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,4,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,8,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,8,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,24,8,128,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,1,128,1,float16,fp8,0,0.1739573280016581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,1,128,1,float16,float16,0,0.17368000745773315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,1,128,1,fp8,fp8,0,0.23170133431752524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,2,128,1,float16,float16,0,0.17568532625834146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,2,128,1,fp8,fp8,0,0.23431466023127237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,4,128,1,float16,float16,0,0.17915733655293783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,4,128,1,float16,fp8,0,0.17852266629536948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,2,128,1,float16,fp8,0,0.1751413345336914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,4,128,1,fp8,fp8,0,0.24285332361857095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,8,128,1,float16,fp8,0,0.18056533734003702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,24,128,1,float16,float16,0,0.09399466713269551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,8,128,1,float16,float16,0,0.18163732687632242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,24,8,128,1,fp8,fp8,0,0.24346667528152466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,24,128,1,float16,fp8,0,0.09363733728726704
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,1,128,1,float16,float16,0,0.09353599945704143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,1,128,1,float16,fp8,0,0.09340799848238628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,1,128,1,fp8,fp8,0,0.12494400143623352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,24,128,1,fp8,fp8,0,0.1328213314215342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,2,128,1,float16,float16,0,0.09367466966311137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,2,128,1,fp8,fp8,0,0.12569066882133484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,4,128,1,float16,float16,0,0.09661333759625752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,2,128,1,float16,fp8,0,0.0944106678167979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,4,128,1,fp8,fp8,0,0.1295093297958374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,4,128,1,float16,fp8,0,0.09573866923650105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,8,128,1,float16,float16,0,0.09711999694506328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,8,128,1,fp8,fp8,0,0.13014400005340576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,24,8,128,1,float16,fp8,0,0.09635200103123982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,24,128,1,float16,float16,0,0.05299200117588043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,24,128,1,float16,fp8,0,0.052576000491778054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,1,128,1,float16,float16,0,0.05358933409055074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,24,128,1,fp8,fp8,0,0.07264000177383423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,1,128,1,fp8,fp8,0,0.07076799869537354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,2,128,1,float16,float16,0,0.05403199791908264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,2,128,1,float16,fp8,0,0.054527997970581055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,1,128,1,float16,fp8,0,0.05417599777380625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,4,128,1,float16,float16,0,0.05489066739877065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,2,128,1,fp8,fp8,0,0.07095466554164886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,4,128,1,float16,fp8,0,0.05465066432952881
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,24,128,1,float16,fp8,0,0.03472533325354258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,8,128,1,float16,float16,0,0.055530667304992676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,4,128,1,fp8,fp8,0,0.07212266822655995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,8,128,1,float16,fp8,0,0.05509866774082184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,24,8,128,1,fp8,fp8,0,0.07248533268769582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,24,128,1,float16,float16,0,0.03418133407831192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,24,128,1,fp8,fp8,0,0.04471466441949209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,1,128,1,float16,fp8,0,0.03501333296298981
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,1,128,1,float16,float16,0,0.035216001172860466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,1,128,1,fp8,fp8,0,0.04354666670163473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,2,128,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,2,128,1,fp8,fp8,0,0.04368533194065094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,2,128,1,float16,float16,0,0.035402665535608925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,4,128,1,float16,fp8,0,0.036677333215872444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,4,128,1,fp8,fp8,0,0.0450186679760615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,4,128,1,float16,float16,0,0.035605333745479584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,8,128,1,float16,float16,0,0.03632533301909765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,8,128,1,fp8,fp8,0,0.04480533301830292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,24,8,128,1,float16,fp8,0,0.03629866739114126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,24,128,1,float16,float16,0,0.02362666775782903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,1,128,1,float16,float16,0,0.02418133368094762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,24,128,1,fp8,fp8,0,0.028949332733949024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,24,128,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,2,128,1,float16,float16,0,0.024362665911515553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,1,128,1,fp8,fp8,0,0.027984000742435455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,2,128,1,float16,fp8,0,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,1,128,1,float16,fp8,0,0.024522667129834492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,2,128,1,fp8,fp8,0,0.028618666032950085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,4,128,1,float16,float16,0,0.024522667129834492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,4,128,1,float16,fp8,0,0.024826665719350178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,8,128,1,float16,float16,0,0.024298667907714844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,8,128,1,float16,fp8,0,0.0245919997493426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,8,128,1,fp8,fp8,0,0.02855466554562251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,24,4,128,1,fp8,fp8,0,0.02849599967400233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,24,128,1,fp8,fp8,0,0.023711999257405598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,24,128,1,float16,fp8,0,0.02070933332045873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,24,128,1,float16,float16,0,0.020266667008399963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,1,128,1,float16,float16,0,0.02086399992307027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,1,128,1,float16,fp8,0,0.020266667008399963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,2,128,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,1,128,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,2,128,1,float16,float16,0,0.020309332758188248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,2,128,1,float16,fp8,0,0.02021866664290428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,8,128,1,float16,float16,0,0.02035733312368393
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,4,128,1,float16,float16,0,0.020453333854675293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,4,128,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,4,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,8,128,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,24,8,128,1,fp8,fp8,0,0.02350933353106181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,24,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,24,128,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,1,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,1,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,2,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,24,128,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,1,128,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,2,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,2,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,4,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,4,128,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,4,128,1,fp8,fp8,0,0.028192001084486645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,8,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,24,128,1,float16,float16,0,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,24,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,24,128,1,fp8,fp8,0,0.01836799954374631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,8,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,1,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,1,128,1,fp8,fp8,0,0.018330667167901993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,24,8,128,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,1,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,2,128,1,fp8,fp8,0,0.01820266619324684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,2,128,1,float16,float16,0,0.01617066686352094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,2,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,4,128,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,4,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,8,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,8,128,1,fp8,fp8,0,0.01817600056529045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,8,128,1,float16,float16,0,0.016165333489576977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,24,4,128,1,float16,float16,0,0.016442666451136272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,24,128,1,float16,float16,0,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,24,128,1,float16,fp8,0,0.01651200031240781
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,24,128,1,fp8,fp8,0,0.017898666361967724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,1,128,1,float16,float16,0,0.016186666985352833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,1,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,1,128,1,fp8,fp8,0,0.018453333526849747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,2,128,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,4,128,1,float16,float16,0,0.016271999726692837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,4,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,2,128,1,float16,float16,0,0.016021333634853363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,2,128,1,float16,fp8,0,0.016688000410795212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,4,128,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,8,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,8,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,24,8,128,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,128,1,float16,fp8,0,16.599018096923828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,128,1,float16,float16,0,16.907925923665363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,128,1,fp8,fp8,0,11.002272288004557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,128,1,fp8,fp8,0,11.040847778320312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,128,1,float16,fp8,0,16.83095423380534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,128,1,float16,float16,0,16.6560796101888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,128,1,float16,float16,0,16.626490275065105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,128,1,float16,fp8,0,16.597151438395183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,128,1,float16,float16,0,8.381823857625326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,128,1,fp8,fp8,0,11.12954076131185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,128,1,float16,float16,0,8.434202829996744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,128,1,float16,fp8,0,8.269610722859701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,128,1,float16,fp8,0,8.350325266520182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,128,1,float16,float16,0,17.15493392944336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,128,1,fp8,fp8,0,5.581066767374675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,128,1,fp8,fp8,0,11.113408406575521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,128,1,fp8,fp8,0,5.397034962972005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,128,1,float16,fp8,0,16.803632100423176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,128,1,float16,float16,0,8.461322784423828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,128,1,float16,fp8,0,8.26748275756836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,128,1,fp8,fp8,0,5.500080108642578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,128,1,float16,float16,0,8.656453450520834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,128,1,fp8,fp8,0,5.468847910563151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,128,1,float16,fp8,0,8.305573145548502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,128,1,float16,float16,0,8.421205520629883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,128,1,float16,float16,0,4.135658582051595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,128,1,float16,fp8,0,8.458784103393555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,128,1,float16,float16,0,4.072266578674316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,128,1,float16,fp8,0,4.014869372049968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,128,1,fp8,fp8,0,5.464752197265625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,128,1,float16,fp8,0,3.976469357808431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,128,1,fp8,fp8,0,2.769317309061686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,128,1,fp8,fp8,0,2.8146721522013345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,128,1,float16,float16,0,4.100218772888184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,128,1,float16,fp8,0,4.00163205464681
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,128,1,fp8,fp8,0,2.7684265772501626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,128,1,fp8,fp8,0,2.7750771840413413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,128,1,float16,fp8,0,4.137173334757487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,128,1,float16,float16,0,4.241701443990071
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,128,1,float16,float16,0,4.211152076721191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,128,1,float16,fp8,0,4.103861490885417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,128,1,float16,float16,0,2.1594826380411782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,128,1,float16,fp8,0,2.1219786008199057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,128,1,float16,float16,0,2.1141653060913086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,128,1,fp8,fp8,0,1.4906293551127117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,128,1,fp8,fp8,0,2.792090733846029
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,128,1,float16,fp8,0,2.1201653480529785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,128,1,fp8,fp8,0,1.4923839569091797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,128,1,float16,float16,0,2.1674985885620117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,128,1,float16,fp8,0,2.145685354868571
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,128,1,fp8,fp8,0,1.489770730336507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,128,1,float16,float16,0,2.142085393269857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,128,1,float16,fp8,0,2.133552074432373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,128,1,fp8,fp8,0,1.4932373364766438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,128,1,float16,float16,0,2.1636479695638022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,128,1,float16,fp8,0,2.136133352915446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,128,1,fp8,fp8,0,1.5072959264119465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,128,1,float16,float16,0,9.770565032958984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,128,1,float16,fp8,0,9.525221506754557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,128,1,fp8,fp8,0,6.3524424235026045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,128,1,float16,float16,0,9.71564801534017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,128,1,float16,fp8,0,9.60966936747233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,128,1,fp8,fp8,0,6.422149022420247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,128,1,float16,float16,0,9.632810592651367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,128,1,float16,fp8,0,9.82153574625651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,128,1,float16,float16,0,4.646880149841309
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,128,1,fp8,fp8,0,6.472581227620442
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,128,1,float16,float16,0,4.741317431131999
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,128,1,float16,float16,0,9.602058410644531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,128,1,float16,fp8,0,9.645055770874023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,128,1,float16,fp8,0,4.865994771321614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,128,1,fp8,fp8,0,3.231578509012858
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,128,1,float16,fp8,0,4.774842580159505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,128,1,fp8,fp8,0,6.3445383707682295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,128,1,fp8,fp8,0,3.2373758951822915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,128,1,float16,float16,0,4.5835466384887695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,128,1,float16,fp8,0,4.712815920511882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,128,1,fp8,fp8,0,3.1871093114217124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,128,1,float16,float16,0,4.98035208384196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,128,1,float16,fp8,0,4.807482719421387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,128,1,fp8,fp8,0,3.2558933893839517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,128,1,float16,float16,0,4.650016148885091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,128,1,float16,fp8,0,4.586026509602864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,128,1,float16,float16,0,2.4171679814656577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,128,1,float16,float16,0,2.3642346064249673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,128,1,fp8,fp8,0,3.229424158732096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,128,1,fp8,fp8,0,1.69650665918986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,128,1,float16,fp8,0,2.3555893898010254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,128,1,fp8,fp8,0,1.6709225972493489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,128,1,float16,fp8,0,2.4008213678995767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,128,1,float16,float16,0,2.3841546376546225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,128,1,float16,fp8,0,2.3918986320495605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,128,1,fp8,fp8,0,1.669114589691162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,128,1,float16,float16,0,2.3905653953552246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,128,1,float16,fp8,0,2.40120538075765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,128,1,fp8,fp8,0,1.6791146596272786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,128,1,float16,float16,0,2.4038453102111816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,128,1,float16,float16,0,1.2929386297861736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,128,1,float16,fp8,0,1.2696266969045003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,128,1,fp8,fp8,0,1.693621317545573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,128,1,float16,float16,0,1.3003839651743572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,128,1,float16,fp8,0,2.398575941721598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,128,1,fp8,fp8,0,0.8699946403503418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,128,1,float16,fp8,0,1.2817546526590984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,128,1,fp8,fp8,0,0.8592693010965983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,128,1,float16,float16,0,1.3031466801961262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,128,1,fp8,fp8,0,0.8624373277028402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,128,1,float16,float16,0,1.3054827054341633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,128,1,float16,fp8,0,1.2799200216929119
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,128,1,fp8,fp8,0,0.8645439942677816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,128,1,float16,fp8,0,1.2902239958445232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,128,1,float16,float16,0,1.2989760239919026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,128,1,float16,fp8,0,1.291973352432251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,128,1,fp8,fp8,0,0.8762719631195068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,128,1,float16,float16,0,6.719130833943685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,128,1,float16,fp8,0,6.94001579284668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,128,1,fp8,fp8,0,4.492159843444824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,128,1,float16,float16,0,6.530266443888347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,128,1,fp8,fp8,0,4.621664047241211
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,128,1,float16,fp8,0,6.70307731628418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,128,1,float16,float16,0,6.773536046346028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,128,1,float16,fp8,0,6.997754414876302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,128,1,fp8,fp8,0,4.584752082824707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,128,1,float16,float16,0,3.4424479802449546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,128,1,float16,float16,0,3.270965258280436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,128,1,float16,float16,0,6.921072006225586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,128,1,float16,fp8,0,3.2869866689046225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,128,1,fp8,fp8,0,2.3654826482137046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,128,1,float16,fp8,0,6.901413599650065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,128,1,fp8,fp8,0,4.599941253662109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,128,1,fp8,fp8,0,2.316826661427816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,128,1,float16,fp8,0,3.2881600062052407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,128,1,float16,float16,0,3.3357067108154297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,128,1,float16,fp8,0,3.2950401306152344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,128,1,fp8,fp8,0,2.3154773712158203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,128,1,float16,fp8,0,3.255839983622233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,128,1,fp8,fp8,0,2.32205867767334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,128,1,float16,float16,0,3.3471199671427407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,128,1,float16,float16,0,3.343407948811849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,128,1,float16,fp8,0,3.2830934524536133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,128,1,float16,float16,0,1.7198987007141113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,128,1,float16,fp8,0,1.7046079635620117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,128,1,float16,float16,0,1.7337759335835774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,128,1,fp8,fp8,0,2.35971736907959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,128,1,fp8,fp8,0,1.2246507008870442
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,128,1,fp8,fp8,0,1.246245304743449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,128,1,float16,fp8,0,1.7181545893351238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,128,1,float16,float16,0,1.7203359603881836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,128,1,float16,fp8,0,1.7048746744791667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,128,1,fp8,fp8,0,1.230234702428182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,128,1,float16,float16,0,1.7432479858398438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,128,1,float16,fp8,0,1.7100639343261719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,128,1,fp8,fp8,0,1.2356479962666829
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,128,1,float16,float16,0,0.9366400241851807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,128,1,float16,float16,0,1.7293866475423176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,128,1,fp8,fp8,0,1.24507737159729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,128,1,float16,fp8,0,0.9337653319040934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,128,1,float16,float16,0,0.9452693462371826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,128,1,float16,fp8,0,1.7303840319315593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,128,1,fp8,fp8,0,0.6471680005391439
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,128,1,float16,fp8,0,0.9427200158437093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,128,1,fp8,fp8,0,0.632586677869161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,128,1,float16,fp8,0,0.9385013580322266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,128,1,fp8,fp8,0,0.6383573214213053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,128,1,float16,float16,0,0.9470506509145101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,128,1,float16,float16,0,0.9500106970469157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,128,1,float16,fp8,0,0.9435253143310547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,128,1,fp8,fp8,0,0.6405280033747355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,128,1,float16,float16,0,0.9590880076090494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,128,1,float16,fp8,0,0.9450026353200277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,128,1,fp8,fp8,0,0.6462826728820801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,128,1,fp8,fp8,0,6.075375874837239
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,128,1,float16,float16,0,9.014944076538086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,128,1,float16,fp8,0,8.916901270548502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,128,1,float16,fp8,0,9.142741521199545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,128,1,float16,float16,0,9.077952067057291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,128,1,fp8,fp8,0,6.081424077351888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,128,1,float16,float16,0,9.028826395670572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,128,1,float16,fp8,0,8.84329605102539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,128,1,float16,float16,0,4.2606401443481445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,128,1,fp8,fp8,0,6.125663757324219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,128,1,float16,float16,0,4.348741213480632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,128,1,float16,fp8,0,4.30734920501709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,128,1,float16,float16,0,9.268202463785807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,128,1,fp8,fp8,0,6.2948424021403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,128,1,float16,fp8,0,4.344474792480469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,128,1,fp8,fp8,0,3.1219892501831055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,128,1,float16,fp8,0,8.959818522135416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,128,1,fp8,fp8,0,3.0512212117513022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,128,1,float16,float16,0,4.300405184427897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,128,1,fp8,fp8,0,3.0576534271240234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,128,1,float16,fp8,0,4.479077339172363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,128,1,fp8,fp8,0,3.0680106480916343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,128,1,float16,fp8,0,4.41428788503011
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,128,1,float16,float16,0,4.413114547729492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,128,1,float16,float16,0,4.421498616536458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,128,1,float16,fp8,0,4.410074551900228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,128,1,float16,float16,0,2.187466621398926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,128,1,float16,fp8,0,2.1925439834594727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,128,1,float16,float16,0,2.2018720308939614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,128,1,fp8,fp8,0,1.5730080604553223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,128,1,float16,fp8,0,2.2137120564778647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,128,1,fp8,fp8,0,1.6243146260579426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,128,1,float16,float16,0,2.1915359497070312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,128,1,float16,fp8,0,2.1709653536478677
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,128,1,fp8,fp8,0,1.5817653338114421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,128,1,fp8,fp8,0,3.1233812967936196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,128,1,float16,float16,0,2.201594670613607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,128,1,float16,fp8,0,2.1778079668680825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,128,1,fp8,fp8,0,1.5928746859232585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,128,1,float16,float16,0,2.206757386525472
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,128,1,float16,fp8,0,2.195903937021891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,128,1,float16,float16,0,1.1757280031840007
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,128,1,float16,fp8,0,1.1581546465555828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,128,1,float16,fp8,0,1.1660213470458984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,128,1,float16,float16,0,1.161472002665202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,128,1,fp8,fp8,0,1.6137065887451172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,128,1,fp8,fp8,0,0.87391463915507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,128,1,fp8,fp8,0,0.8448906739552816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,128,1,float16,float16,0,1.167866627375285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,128,1,float16,fp8,0,1.162618637084961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,128,1,fp8,fp8,0,0.8502079645792643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,128,1,float16,float16,0,1.1734506289164226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,128,1,fp8,fp8,0,0.855557362238566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,128,1,float16,fp8,0,1.1550933519999187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,128,1,float16,float16,0,1.1766346295674641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,128,1,float16,fp8,0,1.162496010462443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,128,1,fp8,fp8,0,0.864565372467041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,128,1,float16,float16,0,0.6540373166402181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,128,1,float16,fp8,0,0.650053342183431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,128,1,float16,float16,0,0.6543253262837728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,128,1,fp8,fp8,0,0.44549334049224854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,128,1,float16,fp8,0,0.6447413365046183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,128,1,fp8,fp8,0,0.45476265748341876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,128,1,float16,float16,0,0.6556693315505981
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,128,1,float16,fp8,0,0.6475573380788168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,128,1,fp8,fp8,0,0.44546135266621906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,128,1,fp8,fp8,0,0.45001065731048584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,128,1,float16,float16,0,0.6603413422902426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,128,1,float16,fp8,0,0.6525760094324747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,128,1,float16,float16,0,0.6585386594136556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,128,1,float16,fp8,0,0.6553119818369547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,128,1,fp8,fp8,0,0.4511733452479045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,128,1,float16,fp8,0,5.213589350382487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,128,1,float16,float16,0,5.049301465352376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,128,1,fp8,fp8,0,3.666794776916504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,128,1,float16,float16,0,5.283077239990234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,128,1,fp8,fp8,0,3.6803468068440757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,128,1,float16,fp8,0,5.22053337097168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,128,1,float16,fp8,0,5.233541488647461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,128,1,float16,float16,0,5.277941385904948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,128,1,float16,float16,0,2.5604213078816733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,128,1,fp8,fp8,0,3.7356694539388022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,128,1,float16,float16,0,2.5741492907206216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,128,1,float16,fp8,0,2.560805320739746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,128,1,float16,float16,0,5.155978520711263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,128,1,float16,fp8,0,5.228426615397136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,128,1,fp8,fp8,0,1.9464000066121419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,128,1,fp8,fp8,0,3.7588958740234375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,128,1,fp8,fp8,0,1.8770240147908528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,128,1,float16,fp8,0,2.5278666814168296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,128,1,float16,float16,0,2.5482986768086753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,128,1,float16,fp8,0,2.5265599886576333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,128,1,fp8,fp8,0,1.882570743560791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,128,1,float16,float16,0,2.5670347213745117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,128,1,float16,fp8,0,2.5224639574686685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,128,1,fp8,fp8,0,1.8990933100382488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,128,1,float16,float16,0,2.6485652923583984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,128,1,float16,float16,0,1.3218986988067627
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,128,1,float16,fp8,0,2.574458599090576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,128,1,float16,float16,0,1.3354132970174153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,128,1,float16,fp8,0,1.3182506561279297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,128,1,float16,fp8,0,1.3286720116933186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,128,1,fp8,fp8,0,1.9267892837524414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,128,1,fp8,fp8,0,1.0253760019938152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,128,1,fp8,fp8,0,0.9839253425598145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,128,1,float16,float16,0,1.3230079809824626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,128,1,float16,fp8,0,1.3162186940511067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,128,1,fp8,fp8,0,0.9855999946594238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,128,1,float16,fp8,0,1.3169013659159343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,128,1,float16,float16,0,1.3424800237019856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,128,1,fp8,fp8,0,0.9916586875915527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,128,1,float16,float16,0,1.3429546356201172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,128,1,float16,float16,0,0.720853328704834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,128,1,float16,fp8,0,1.343429406483968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,128,1,float16,fp8,0,0.7078666687011719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,128,1,fp8,fp8,0,1.0070292949676514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,128,1,float16,fp8,0,0.7148640155792236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,128,1,fp8,fp8,0,0.5297919909159342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,128,1,float16,float16,0,0.7233013312021891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,128,1,fp8,fp8,0,0.5063999891281128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,128,1,float16,fp8,0,0.7148586908976237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,128,1,float16,float16,0,0.7216906547546387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,128,1,fp8,fp8,0,0.5089439948399862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,128,1,float16,float16,0,0.719754695892334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,128,1,float16,fp8,0,0.7187413374582926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,128,1,fp8,fp8,0,0.5144960085550944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,128,1,float16,fp8,0,0.7192533016204834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,128,1,float16,float16,0,0.7276000181833903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,128,1,float16,float16,0,0.3622613350550334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,128,1,float16,float16,0,0.370037317276001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,128,1,float16,fp8,0,0.3578346570332845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,128,1,fp8,fp8,0,0.2939466635386149
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,128,1,fp8,fp8,0,0.5244266589482626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,128,1,float16,fp8,0,0.3630186716715495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,128,1,fp8,fp8,0,0.28549333413441974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,128,1,float16,fp8,0,0.3582719961802165
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,128,1,fp8,fp8,0,0.28706665833791095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,128,1,float16,float16,0,0.36526934305826825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,128,1,fp8,fp8,0,0.2895840009053548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,128,1,float16,float16,0,0.3651786645253499
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,128,1,float16,fp8,0,0.359605352083842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,128,1,float16,float16,0,0.3681386709213257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,128,1,float16,fp8,0,0.3638826608657837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,128,1,fp8,fp8,0,0.29290133714675903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,128,1,float16,fp8,0,5.014645258585612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,128,1,float16,float16,0,5.013738632202148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,128,1,fp8,fp8,0,3.7463839848836265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,128,1,float16,fp8,0,4.950405438741048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,128,1,fp8,fp8,0,3.7580798467000327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,128,1,float16,float16,0,4.983914693196614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,128,1,float16,fp8,0,5.005248069763184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,128,1,float16,float16,0,5.078661282857259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,128,1,float16,float16,0,2.4808212916056314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,128,1,fp8,fp8,0,3.7975571950276694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,128,1,float16,fp8,0,2.4377172787984214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,128,1,float16,float16,0,2.5481066703796387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,128,1,float16,fp8,0,2.5323947270711265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,128,1,float16,float16,0,5.264378547668457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,128,1,float16,fp8,0,5.249637285868327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,128,1,fp8,fp8,0,2.0164693196614585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,128,1,fp8,fp8,0,1.9049386978149414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,128,1,fp8,fp8,0,3.8672800064086914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,128,1,float16,float16,0,2.495797316233317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,128,1,float16,fp8,0,2.439765294392904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,128,1,fp8,fp8,0,1.9129014015197754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,128,1,float16,float16,0,2.52127472559611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,128,1,fp8,fp8,0,1.9306027094523113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,128,1,float16,fp8,0,2.4874560038248696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,128,1,float16,float16,0,2.524885336558024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,128,1,float16,float16,0,1.2710293134053547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,128,1,float16,fp8,0,1.2589279810587566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,128,1,float16,fp8,0,2.4832480748494468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,128,1,float16,float16,0,1.296730677286784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,128,1,fp8,fp8,0,0.9813013076782227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,128,1,fp8,fp8,0,1.9682879447937012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,128,1,float16,fp8,0,1.2994879881540935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,128,1,float16,float16,0,1.2745386759440105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,128,1,fp8,fp8,0,1.0430880387624104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,128,1,float16,fp8,0,1.2633386452992756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,128,1,fp8,fp8,0,0.9814879894256592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,128,1,float16,fp8,0,1.260965347290039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,128,1,float16,float16,0,1.277359962463379
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,128,1,fp8,fp8,0,0.9944640000661215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,128,1,float16,float16,0,0.692512035369873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,128,1,float16,fp8,0,1.2788586616516113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,128,1,float16,fp8,0,0.6789706548055013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,128,1,float16,float16,0,1.3002399603525798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,128,1,fp8,fp8,0,1.0139466921488445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,128,1,float16,fp8,0,0.6629279851913452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,128,1,float16,float16,0,0.6744426886240641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,128,1,fp8,fp8,0,0.5245546499888102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,128,1,float16,float16,0,0.6763146718343099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,128,1,float16,fp8,0,0.6659040053685507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,128,1,fp8,fp8,0,0.5586186647415161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,128,1,float16,float16,0,0.6812160015106201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,128,1,fp8,fp8,0,0.5271893342336019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,128,1,float16,fp8,0,0.6699999968210856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,128,1,fp8,fp8,0,0.5306133429209391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,128,1,float16,float16,0,0.6903413136800131
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,128,1,float16,fp8,0,0.6791679859161377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,128,1,float16,float16,0,0.37741867701212567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,128,1,float16,fp8,0,0.38142398993174237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,128,1,float16,float16,0,0.38411200046539307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,128,1,fp8,fp8,0,0.5409973462422689
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,128,1,fp8,fp8,0,0.2937333385149638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,128,1,fp8,fp8,0,0.27718933423360187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,128,1,float16,fp8,0,0.3712906837463379
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,128,1,float16,float16,0,0.380351980527242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,128,1,fp8,fp8,0,0.27752000093460083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,128,1,float16,fp8,0,0.3729226589202881
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,128,1,float16,float16,0,0.3814186652501424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,128,1,float16,fp8,0,0.3752853473027547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,128,1,float16,float16,0,0.385696013768514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,128,1,fp8,fp8,0,0.28109333912531537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,128,1,float16,float16,0,0.19910399119059244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,128,1,fp8,fp8,0,0.2870453397432963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,128,1,float16,fp8,0,0.19647467136383057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,128,1,float16,float16,0,0.19195733467737833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,128,1,float16,fp8,0,0.3798133134841919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,128,1,fp8,fp8,0,0.16955200831095377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,128,1,float16,fp8,0,0.19061867396036783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,128,1,float16,float16,0,0.19302932421366373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,128,1,fp8,fp8,0,0.15923200050989786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,128,1,float16,fp8,0,0.19040000438690186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,128,1,float16,float16,0,0.19401599963506064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,128,1,fp8,fp8,0,0.15958399573961893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,128,1,fp8,fp8,0,0.1632960041364034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,128,1,float16,fp8,0,0.19161067406336466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,128,1,float16,float16,0,0.19762667020161948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,128,1,float16,fp8,0,0.19352000951766968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,128,1,fp8,fp8,0,0.1662453313668569
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,128,1,float16,fp8,0,2.96126397450765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,128,1,float16,float16,0,3.030437469482422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,128,1,fp8,fp8,0,2.4016745885213218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,128,1,float16,float16,0,3.0425332387288413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,128,1,fp8,fp8,0,2.420351982116699
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,128,1,float16,fp8,0,2.9741811752319336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,128,1,float16,float16,0,3.1048692067464194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,128,1,float16,fp8,0,3.053466796875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,128,1,float16,float16,0,1.5378185907999675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,128,1,fp8,fp8,0,2.4402987162272134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,128,1,float16,float16,0,1.6038880348205566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,128,1,fp8,fp8,0,1.3138186931610107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,128,1,fp8,fp8,0,2.499920050303141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,128,1,float16,fp8,0,3.0786399841308594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,128,1,float16,float16,0,3.108112017313639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,128,1,float16,fp8,0,1.506922721862793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,128,1,float16,fp8,0,1.567402680714925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,128,1,fp8,fp8,0,1.2200000286102295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,128,1,float16,float16,0,1.5468427340189617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,128,1,float16,fp8,0,1.5166133244832356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,128,1,fp8,fp8,0,1.2283946673075359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,128,1,float16,float16,0,1.5471253395080566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,128,1,float16,fp8,0,1.5187733968098958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,128,1,fp8,fp8,0,1.2452320257822673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,128,1,float16,float16,0,1.5682613054911296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,128,1,float16,fp8,0,1.542143980662028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,128,1,float16,float16,0,0.7975306510925293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,128,1,fp8,fp8,0,1.2775146961212158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,128,1,float16,fp8,0,0.7830133438110352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,128,1,fp8,fp8,0,0.6369653145472208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,128,1,float16,float16,0,0.798367977142334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,128,1,float16,fp8,0,0.812053362528483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,128,1,fp8,fp8,0,0.6863306363423666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,128,1,float16,float16,0,0.8262773354848226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,128,1,float16,fp8,0,0.7846826712290446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,128,1,fp8,fp8,0,0.643392006556193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,128,1,float16,fp8,0,0.7913066546122233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,128,1,float16,float16,0,0.8051839669545492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,128,1,float16,float16,0,0.8136959870656332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,128,1,fp8,fp8,0,0.6465813318888346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,128,1,float16,float16,0,0.4400906562805176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,128,1,float16,fp8,0,0.8032053311665853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,128,1,float16,fp8,0,0.4406133492787679
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,128,1,fp8,fp8,0,0.6626026630401611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,128,1,fp8,fp8,0,0.36213334401448566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,128,1,float16,fp8,0,0.4238293170928955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,128,1,fp8,fp8,0,0.32952000697453815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,128,1,float16,float16,0,0.4326719840367635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,128,1,fp8,fp8,0,0.3331306576728821
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,128,1,float16,fp8,0,0.42497066656748456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,128,1,float16,float16,0,0.4310613473256429
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,128,1,float16,float16,0,0.4354666471481323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,128,1,float16,fp8,0,0.427781343460083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,128,1,fp8,fp8,0,0.33716265360514325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,128,1,float16,float16,0,0.43881066640218097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,128,1,float16,fp8,0,0.43511466185251874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,128,1,float16,float16,0,0.2315839926401774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,128,1,fp8,fp8,0,0.3485013246536255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,128,1,fp8,fp8,0,0.19873066743214926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,128,1,float16,fp8,0,0.22961066166559854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,128,1,float16,float16,0,0.22048532962799072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,128,1,float16,fp8,0,0.2163146734237671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,128,1,fp8,fp8,0,0.18345600366592407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,128,1,float16,float16,0,0.22166933616002402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,128,1,float16,fp8,0,0.21622933944066366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,128,1,fp8,fp8,0,0.18678933382034302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,128,1,float16,float16,0,0.22290666898091635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,128,1,fp8,fp8,0,0.18818666537602743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,128,1,float16,fp8,0,0.21937066316604614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,128,1,float16,float16,0,0.22741333643595377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,128,1,float16,fp8,0,0.22234133879343668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,128,1,float16,float16,0,0.136053333679835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,128,1,fp8,fp8,0,0.19278399149576822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,128,1,float16,fp8,0,0.13573333621025085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,128,1,fp8,fp8,0,0.11876266201337178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,128,1,float16,float16,0,0.1308746635913849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,128,1,fp8,fp8,0,0.10955733060836792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,128,1,float16,fp8,0,0.12918933232625326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,128,1,float16,float16,0,0.13100799918174744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,128,1,float16,fp8,0,0.12982933719952902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,128,1,fp8,fp8,0,0.1104213297367096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,128,1,float16,float16,0,0.13223466277122498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,128,1,float16,fp8,0,0.13010133306185404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,128,1,fp8,fp8,0,0.11236266295115153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,128,1,float16,float16,0,0.13391466935475668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,128,1,fp8,fp8,0,0.1160586675008138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,128,1,float16,fp8,0,0.13272000352541605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,128,1,float16,float16,0,3.1960531870524087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,128,1,fp8,fp8,0,2.654655933380127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,128,1,float16,fp8,0,3.071023941040039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,128,1,float16,float16,0,3.2243467966715493
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,128,1,fp8,fp8,0,2.6614774068196616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,128,1,float16,fp8,0,3.0847094853719077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,128,1,float16,float16,0,3.2478612263997397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,128,1,float16,fp8,0,3.1466293334960938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,128,1,float16,float16,0,1.596757411956787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,128,1,fp8,fp8,0,2.6947946548461914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,128,1,float16,float16,0,1.6839572588602703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,128,1,float16,fp8,0,1.643642743428548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,128,1,float16,fp8,0,1.5518560409545898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,128,1,float16,float16,0,3.361029307047526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,128,1,fp8,fp8,0,1.333738644917806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,128,1,float16,fp8,0,3.219034512837728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,128,1,fp8,fp8,0,1.4551466306050618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,128,1,fp8,fp8,0,2.7744054794311523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,128,1,float16,float16,0,1.6046346028645833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,128,1,float16,fp8,0,1.5572853088378906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,128,1,fp8,fp8,0,1.3443412780761719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,128,1,float16,fp8,0,1.5732693672180176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,128,1,float16,float16,0,1.626186688741048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,128,1,fp8,fp8,0,1.362005392710368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,128,1,float16,float16,0,1.653114636739095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,128,1,float16,fp8,0,1.6168640454610188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,128,1,float16,float16,0,0.8138293425242106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,128,1,float16,fp8,0,0.794111967086792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,128,1,fp8,fp8,0,1.4033279418945312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,128,1,float16,float16,0,0.8518986701965332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,128,1,fp8,fp8,0,0.6810346444447836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,128,1,float16,fp8,0,0.8463146686553955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,128,1,float16,float16,0,0.8158453305562338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,128,1,fp8,fp8,0,0.7502986590067545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,128,1,float16,fp8,0,0.8000586827596029
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,128,1,fp8,fp8,0,0.6880906422932943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,128,1,float16,float16,0,0.8216853141784668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,128,1,float16,fp8,0,0.8056159814198812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,128,1,fp8,fp8,0,0.6957866350809733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,128,1,float16,float16,0,0.8389440377553304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,128,1,float16,float16,0,0.45154666900634766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,128,1,float16,fp8,0,0.8245706558227539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,128,1,float16,float16,0,0.43060266971588135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,128,1,fp8,fp8,0,0.7181119918823242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,128,1,float16,fp8,0,0.42025065422058105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,128,1,float16,fp8,0,0.4447093407313029
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,128,1,fp8,fp8,0,0.39974931875864667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,128,1,fp8,fp8,0,0.3631253242492676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,128,1,float16,float16,0,0.43458131949106854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,128,1,float16,fp8,0,0.42469334602355957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,128,1,fp8,fp8,0,0.36589332421620685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,128,1,float16,float16,0,0.4366399844487508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,128,1,float16,fp8,0,0.4273546536763509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,128,1,fp8,fp8,0,0.37158934275309247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,128,1,float16,float16,0,0.4443146785100301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,128,1,float16,fp8,0,0.43770134449005127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,128,1,float16,float16,0,0.2379253307978312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,128,1,float16,fp8,0,0.23332800467809042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,128,1,float16,float16,0,0.24978133042653403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,128,1,float16,fp8,0,0.24684800704320273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,128,1,fp8,fp8,0,0.3829333384831746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,128,1,fp8,fp8,0,0.2122933268547058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,128,1,fp8,fp8,0,0.19213332732518515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,128,1,float16,float16,0,0.2406346599260966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,128,1,float16,fp8,0,0.2350133260091146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,128,1,fp8,fp8,0,0.19343467553456625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,128,1,float16,float16,0,0.2429866592089335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,128,1,float16,fp8,0,0.2379306753476461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,128,1,fp8,fp8,0,0.1962826649347941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,128,1,float16,float16,0,0.2466933329900106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,128,1,float16,fp8,0,0.24356800317764282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,128,1,float16,float16,0,0.13141866525014242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,128,1,fp8,fp8,0,0.20255466302235922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,128,1,float16,fp8,0,0.13059199849764505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,128,1,float16,float16,0,0.12403200070063274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,128,1,float16,fp8,0,0.12104533116022746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,128,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,128,1,fp8,fp8,0,0.12136000394821167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,128,1,float16,float16,0,0.12416000167528789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,128,1,float16,fp8,0,0.12153066198031108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,128,1,fp8,fp8,0,0.10974933703740437
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,128,1,float16,fp8,0,0.12410133083661397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,128,1,float16,float16,0,0.1258080005645752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,128,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,128,1,float16,float16,0,0.1283680001894633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,128,1,float16,fp8,0,0.12622933586438498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,128,1,fp8,fp8,0,0.11706667145093282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,128,1,float16,float16,0,0.07814933359622955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,128,1,float16,fp8,0,0.07918400069077809
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,128,1,float16,float16,0,0.07551999886830647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,128,1,fp8,fp8,0,0.07451733450094859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,128,1,float16,fp8,0,0.07477866609891255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,128,1,fp8,fp8,0,0.06791999936103821
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,128,1,float16,float16,0,0.07569066683451335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,128,1,float16,fp8,0,0.07487999896208446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,128,1,fp8,fp8,0,0.06781333188215892
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,128,1,float16,float16,0,0.07619733115037282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,128,1,float16,fp8,0,0.07434666653474171
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,128,1,fp8,fp8,0,0.06875733534495036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,128,1,float16,float16,0,0.07634133100509644
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,128,1,float16,fp8,0,0.0758240024248759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,128,1,fp8,fp8,0,0.0705866664648056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,128,1,float16,float16,0,2.093536059061686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,128,1,float16,fp8,0,1.9727412859598796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,128,1,fp8,fp8,0,1.7988533973693848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,128,1,float16,float16,0,2.1011412938435874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,128,1,fp8,fp8,0,1.8113867441813152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,128,1,float16,fp8,0,1.991312026977539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,128,1,float16,float16,0,2.122320016225179
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,128,1,float16,fp8,0,2.0148159662882485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,128,1,float16,float16,0,1.0437866846720378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,128,1,fp8,fp8,0,1.8278826077779133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,128,1,float16,float16,0,1.1280319690704346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,128,1,float16,fp8,0,1.077994664510091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,128,1,float16,fp8,0,1.0035999615987141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,128,1,fp8,fp8,0,1.0013439655303955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,128,1,float16,float16,0,2.1835519472757974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,128,1,fp8,fp8,0,1.8863040606180828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,128,1,float16,fp8,0,2.0572053591410318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,128,1,fp8,fp8,0,0.9037653605143229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,128,1,float16,float16,0,1.0533920129140217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,128,1,float16,fp8,0,1.009109338124593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,128,1,fp8,fp8,0,0.9103946685791016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,128,1,float16,float16,0,1.0567839940388997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,128,1,float16,fp8,0,1.0198773543039958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,128,1,fp8,fp8,0,0.9265546798706055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,128,1,float16,fp8,0,1.0449333190917969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,128,1,float16,float16,0,1.0859893163045247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,128,1,float16,float16,0,0.5349546670913696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,128,1,float16,float16,0,0.5679626862208048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,128,1,float16,fp8,0,0.5219573179880778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,128,1,float16,fp8,0,0.562064011891683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,128,1,fp8,fp8,0,0.9573173522949219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,128,1,fp8,fp8,0,0.4669333299001058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,128,1,fp8,fp8,0,0.5200426578521729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,128,1,float16,float16,0,0.5396426518758138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,128,1,float16,fp8,0,0.5236213207244873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,128,1,fp8,fp8,0,0.46989333629608154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,128,1,float16,float16,0,0.5429813464482626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,128,1,fp8,fp8,0,0.4800853331883748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,128,1,float16,fp8,0,0.5302240053812662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,128,1,float16,float16,0,0.5554186503092448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,128,1,float16,float16,0,0.30394667387008667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,128,1,float16,float16,0,0.28814399242401123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,128,1,fp8,fp8,0,0.49502400557200116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,128,1,float16,fp8,0,0.2808159987131755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,128,1,float16,fp8,0,0.5428373416264852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,128,1,float16,fp8,0,0.29981333017349243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,128,1,fp8,fp8,0,0.2765866716702779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,128,1,fp8,fp8,0,0.2432639996210734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,128,1,float16,float16,0,0.28911999861399335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,128,1,float16,fp8,0,0.2840106685956319
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,128,1,fp8,fp8,0,0.24560532967249551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,128,1,float16,float16,0,0.29284266630808514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,128,1,fp8,fp8,0,0.24885867039362589
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,128,1,float16,float16,0,0.2988106608390808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,128,1,float16,fp8,0,0.28567999601364136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,128,1,float16,fp8,0,0.2924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,128,1,float16,float16,0,0.16477333505948386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,128,1,float16,fp8,0,0.16382933656374613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,128,1,fp8,fp8,0,0.25861332813898724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,128,1,float16,float16,0,0.1506186624368032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,128,1,fp8,fp8,0,0.15018133322397867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,128,1,float16,fp8,0,0.1474133332570394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,128,1,float16,float16,0,0.15083199739456177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,128,1,float16,fp8,0,0.14975466330846152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,128,1,fp8,fp8,0,0.13646933436393738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,128,1,fp8,fp8,0,0.13529066244761148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,128,1,float16,float16,0,0.15318933129310608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,128,1,float16,fp8,0,0.15028267105420431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,128,1,fp8,fp8,0,0.13873066504796347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,128,1,float16,float16,0,0.15758400162061056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,128,1,float16,fp8,0,0.15479999780654907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,128,1,float16,float16,0,0.09357333183288574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,128,1,fp8,fp8,0,0.14330666263898215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,128,1,float16,float16,0,0.0860533316930135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,128,1,fp8,fp8,0,0.09047999978065491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,128,1,float16,fp8,0,0.09261332949002583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,128,1,float16,fp8,0,0.08483200271924336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,128,1,fp8,fp8,0,0.0773119976123174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,128,1,float16,float16,0,0.0860586663087209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,128,1,fp8,fp8,0,0.07922666768232982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,128,1,float16,fp8,0,0.08504000306129456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,128,1,float16,float16,0,0.08839466174443562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,128,1,float16,fp8,0,0.08545600374539693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,128,1,float16,float16,0,0.08980266253153484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,128,1,float16,fp8,0,0.08903466661771138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,128,1,fp8,fp8,0,0.08641599615414937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,128,1,fp8,fp8,0,0.08130666613578796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,128,1,float16,float16,0,0.058789332707722984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,128,1,float16,float16,0,0.05695466697216034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,128,1,float16,fp8,0,0.05804799993832906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,128,1,fp8,fp8,0,0.055685331424077354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,128,1,float16,fp8,0,0.05624533196290334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,128,1,fp8,fp8,0,0.05207466582457224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,128,1,float16,float16,0,0.05723733206590017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,128,1,fp8,fp8,0,0.05261866748332977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,128,1,float16,float16,0,0.057215998570124306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,128,1,float16,fp8,0,0.05656533439954122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,128,1,fp8,fp8,0,0.0524586687485377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,128,1,float16,fp8,0,0.05669333537419637
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,128,1,float16,float16,0,0.057616000374158226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,128,1,float16,fp8,0,0.05683733522891998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,128,1,float16,float16,0,1.9403893152872722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,128,1,float16,fp8,0,1.9218559265136719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,128,1,fp8,fp8,0,1.768954594930013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,128,1,float16,float16,0,1.9587945938110352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,128,1,fp8,fp8,0,1.8392693201700847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,128,1,float16,float16,0,2.0793333053588867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,128,1,float16,fp8,0,1.9773866335550945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,128,1,float16,fp8,0,2.099818706512451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,128,1,float16,float16,0,0.981882651646932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,128,1,fp8,fp8,0,1.9978346824645996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,128,1,float16,float16,0,1.1447253227233887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,128,1,float16,fp8,0,0.9838879903157552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,128,1,float16,fp8,0,1.1240800221761067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,128,1,float16,fp8,0,2.1536266009012857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,128,1,float16,float16,0,2.1461599667867026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,128,1,fp8,fp8,0,1.9932479858398438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,128,1,fp8,fp8,0,0.8859306971232096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,128,1,fp8,fp8,0,1.026096026102702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,128,1,float16,fp8,0,0.9934079647064209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,128,1,float16,float16,0,0.9931466579437256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,128,1,fp8,fp8,0,0.9274933338165283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,128,1,float16,fp8,0,1.051968018213908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,128,1,float16,float16,0,1.0456586678822835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,128,1,fp8,fp8,0,1.0087626775105794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,128,1,float16,float16,0,1.0705066521962483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,128,1,float16,float16,0,0.5062346855799357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,128,1,float16,fp8,0,0.5047839879989624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,128,1,float16,float16,0,0.5767093499501547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,128,1,float16,fp8,0,0.5683840115865072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,128,1,fp8,fp8,0,1.0102453231811523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,128,1,float16,fp8,0,1.080133358637492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,128,1,fp8,fp8,0,0.4514133135477702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,128,1,fp8,fp8,0,0.5223573446273804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,128,1,float16,float16,0,0.5112266540527344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,128,1,fp8,fp8,0,0.4750026861826579
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,128,1,float16,fp8,0,0.5091946522394816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,128,1,float16,float16,0,0.5292426745096842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,128,1,float16,fp8,0,0.5242506663004557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,128,1,fp8,fp8,0,0.5156213442484537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,128,1,float16,float16,0,0.5353440046310425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,128,1,fp8,fp8,0,0.5198239882787069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,128,1,float16,float16,0,0.2690666715304057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,128,1,float16,float16,0,0.30165332555770874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,128,1,float16,fp8,0,0.29711467027664185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,128,1,float16,fp8,0,0.5469333330790201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,128,1,float16,fp8,0,0.2669600049654643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,128,1,fp8,fp8,0,0.2724799911181132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,128,1,fp8,fp8,0,0.23822933435440063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,128,1,float16,fp8,0,0.27034666140874225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,128,1,fp8,fp8,0,0.24839999278386435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,128,1,float16,float16,0,0.27029865980148315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,128,1,float16,float16,0,0.27746133009592694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,128,1,float16,fp8,0,0.27619733413060504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,128,1,fp8,fp8,0,0.2691253423690796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,128,1,float16,float16,0,0.14726932843526205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,128,1,float16,float16,0,0.28200532992680866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,128,1,fp8,fp8,0,0.2699306607246399
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,128,1,float16,fp8,0,0.2842079997062683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,128,1,float16,float16,0,0.16486933827400208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,128,1,fp8,fp8,0,0.14276267091433206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,128,1,float16,fp8,0,0.1620266636212667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,128,1,fp8,fp8,0,0.12499733765920003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,128,1,float16,fp8,0,0.14777066310246786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,128,1,float16,float16,0,0.15001066525777182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,128,1,float16,fp8,0,0.1492586632569631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,128,1,fp8,fp8,0,0.12843733032544455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,128,1,float16,float16,0,0.15461867054303488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,128,1,float16,fp8,0,0.1532746652762095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,128,1,fp8,fp8,0,0.13798933227856955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,128,1,float16,float16,0,0.1569973329703013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,128,1,float16,float16,0,0.08794132868448894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,128,1,fp8,fp8,0,0.13900799552599588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,128,1,float16,fp8,0,0.08692266543706258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,128,1,fp8,fp8,0,0.07913599908351898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,128,1,float16,fp8,0,0.1549013356367747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,128,1,float16,float16,0,0.07732800145943959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,128,1,float16,fp8,0,0.0772159993648529
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,128,1,fp8,fp8,0,0.06821333368619283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,128,1,float16,float16,0,0.07828799883524577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,128,1,float16,fp8,0,0.0788320004940033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,128,1,fp8,fp8,0,0.07017066578070323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,128,1,float16,float16,0,0.08122133215268452
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,128,1,float16,fp8,0,0.08098133405049641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,128,1,float16,fp8,0,0.08292800188064575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,128,1,fp8,fp8,0,0.07674666742483775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,128,1,fp8,fp8,0,0.07678399980068207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,128,1,float16,fp8,0,0.05012799799442291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,128,1,float16,float16,0,0.08304533362388611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,128,1,float16,float16,0,0.05129600067933401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,128,1,fp8,fp8,0,0.04796266555786133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,128,1,float16,fp8,0,0.045514668027559914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,128,1,fp8,fp8,0,0.04163199911514918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,128,1,float16,float16,0,0.04580800235271454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,128,1,float16,float16,0,0.045754666129748024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,128,1,float16,fp8,0,0.04610133171081543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,128,1,fp8,fp8,0,0.04271466533342997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,128,1,float16,float16,0,0.046575998266537987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,128,1,float16,fp8,0,0.046997333566347756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,128,1,fp8,fp8,0,0.04448533554871877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,128,1,float16,float16,0,0.04716266691684723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,128,1,float16,fp8,0,0.047024001677831016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,128,1,float16,float16,0,0.03736000011364619
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,128,1,float16,fp8,0,0.03696000079313914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,128,1,fp8,fp8,0,0.045221333702405296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,128,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,128,1,float16,float16,0,0.03477333237727483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,128,1,float16,fp8,0,0.03490666548411051
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,128,1,fp8,fp8,0,0.03123733401298523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,128,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,128,1,fp8,fp8,0,0.031167998909950256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,128,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,128,1,float16,fp8,0,0.03554133325815201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,128,1,fp8,fp8,0,0.03233066697915395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,128,1,float16,float16,0,0.035642666121323906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,128,1,float16,float16,0,0.035461333890755974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,128,1,float16,fp8,0,0.036277333895365395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,128,1,fp8,fp8,0,0.032501332461833954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,1,128,1,float16,float16,0,1.5093013445536296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,1,128,1,float16,fp8,0,1.4964853922526042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,1,128,1,fp8,fp8,0,1.4633493423461914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,2,128,1,float16,float16,0,1.5291520754496257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,2,128,1,fp8,fp8,0,1.5470879872639973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,2,128,1,float16,fp8,0,1.5327787399291992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,4,128,1,float16,fp8,0,1.6629172960917156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,4,128,1,float16,float16,0,1.6352906227111816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,1,128,1,float16,float16,0,0.7620053291320801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,4,128,1,fp8,fp8,0,1.6972692807515461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,16,128,1,float16,fp8,0,0.9093600114186605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,16,128,1,fp8,fp8,0,0.8798240025838217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,16,128,1,float16,float16,0,0.9290933609008789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,1,128,1,float16,fp8,0,0.7576693693796793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,8,128,1,float16,float16,0,1.7138187090555828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,1,128,1,fp8,fp8,0,0.7314293384552002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,8,128,1,float16,fp8,0,1.722266674041748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,16,8,128,1,fp8,fp8,0,1.6960479418436687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,2,128,1,float16,float16,0,0.7702613671620687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,2,128,1,float16,fp8,0,0.7678399880727133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,2,128,1,fp8,fp8,0,0.7773866653442383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,4,128,1,float16,fp8,0,0.8299946784973145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,4,128,1,float16,float16,0,0.8200799624125162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,4,128,1,fp8,fp8,0,0.8554826577504476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,8,128,1,float16,float16,0,0.8512693246205648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,1,128,1,float16,float16,0,0.39284801483154297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,8,128,1,float16,fp8,0,0.8632000287373861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,16,128,1,float16,float16,0,0.4679306745529175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,1,128,1,float16,fp8,0,0.3928639888763428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,16,8,128,1,fp8,fp8,0,0.8566400210062662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,1,128,1,fp8,fp8,0,0.3730400005976359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,16,128,1,fp8,fp8,0,0.4456533193588257
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,2,128,1,float16,float16,0,0.3975359996159871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,16,128,1,float16,fp8,0,0.4596906503041585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,2,128,1,float16,fp8,0,0.3970400094985962
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,2,128,1,fp8,fp8,0,0.39671464761098224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,4,128,1,float16,float16,0,0.4176853497823079
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,4,128,1,float16,fp8,0,0.4158986806869507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,4,128,1,fp8,fp8,0,0.43702399730682373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,8,128,1,float16,float16,0,0.4236373504002889
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,16,128,1,float16,fp8,0,0.2405386765797933
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,16,128,1,float16,float16,0,0.24625066916147867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,1,128,1,float16,float16,0,0.20849599440892538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,8,128,1,float16,fp8,0,0.4338826735814412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,1,128,1,float16,fp8,0,0.20990933974583945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,16,8,128,1,fp8,fp8,0,0.44098134835561115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,16,128,1,fp8,fp8,0,0.23372799158096313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,1,128,1,fp8,fp8,0,0.1960053245226542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,2,128,1,float16,float16,0,0.21081066131591797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,2,128,1,float16,fp8,0,0.21079466740290323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,2,128,1,fp8,fp8,0,0.20510933796564737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,4,128,1,float16,fp8,0,0.21832533677419028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,4,128,1,float16,float16,0,0.2181439995765686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,4,128,1,fp8,fp8,0,0.22764799992243448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,8,128,1,float16,fp8,0,0.22474133968353271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,1,128,1,float16,float16,0,0.11547199885050456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,8,128,1,float16,float16,0,0.22519999742507935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,16,128,1,float16,float16,0,0.13496533036231995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,16,8,128,1,fp8,fp8,0,0.22892266511917114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,16,128,1,float16,fp8,0,0.13183466593424478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,1,128,1,float16,fp8,0,0.11613866686820984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,16,128,1,fp8,fp8,0,0.12405866384506226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,1,128,1,fp8,fp8,0,0.10859733819961548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,2,128,1,float16,float16,0,0.11756267150243123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,2,128,1,fp8,fp8,0,0.10965333382288615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,2,128,1,float16,fp8,0,0.11736533045768738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,4,128,1,float16,float16,0,0.12306132912635803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,4,128,1,float16,fp8,0,0.12132267157236735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,4,128,1,fp8,fp8,0,0.12034133076667786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,8,128,1,float16,float16,0,0.12541866302490234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,1,128,1,float16,float16,0,0.06419200201829274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,8,128,1,float16,fp8,0,0.12325867017110188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,16,128,1,fp8,fp8,0,0.07018133501211803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,16,128,1,float16,float16,0,0.07541333138942719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,16,8,128,1,fp8,fp8,0,0.12106666962305705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,1,128,1,float16,fp8,0,0.0636106679836909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,1,128,1,fp8,fp8,0,0.059258664647738137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,16,128,1,float16,fp8,0,0.07374399900436401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,2,128,1,float16,float16,0,0.06440000236034393
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,2,128,1,fp8,fp8,0,0.06204266846179962
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,2,128,1,float16,fp8,0,0.06557866434256236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,4,128,1,float16,float16,0,0.06763733426729839
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,4,128,1,float16,fp8,0,0.06734933455785115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,8,128,1,float16,float16,0,0.06899733344713847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,4,128,1,fp8,fp8,0,0.06746666630109151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,8,128,1,fp8,fp8,0,0.06825066606203715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,16,128,1,float16,float16,0,0.04372266431649526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,16,128,1,float16,fp8,0,0.044293334086736046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,1,128,1,float16,float16,0,0.038805333276589714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,16,8,128,1,float16,fp8,0,0.06924800078074138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,16,128,1,fp8,fp8,0,0.04287466903527578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,1,128,1,float16,fp8,0,0.03897066662708918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,2,128,1,float16,float16,0,0.039162665605545044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,1,128,1,fp8,fp8,0,0.036346666514873505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,2,128,1,float16,fp8,0,0.039018665750821434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,4,128,1,float16,float16,0,0.0401653324564298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,2,128,1,fp8,fp8,0,0.03734400123357773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,4,128,1,fp8,fp8,0,0.038975998759269714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,8,128,1,float16,float16,0,0.040394666294256844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,8,128,1,float16,fp8,0,0.04098666707674662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,4,128,1,float16,fp8,0,0.04085866610209147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,16,128,1,float16,float16,0,0.029701332251230877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,16,8,128,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,16,128,1,float16,fp8,0,0.03053866575161616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,16,128,1,fp8,fp8,0,0.028346667687098186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,1,128,1,float16,float16,0,0.027744000156720478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,1,128,1,float16,fp8,0,0.028170667588710785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,1,128,1,fp8,fp8,0,0.026426665484905243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,2,128,1,float16,fp8,0,0.028362666567166645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,4,128,1,float16,float16,0,0.02888533224662145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,4,128,1,float16,fp8,0,0.028970666229724884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,4,128,1,fp8,fp8,0,0.027855999767780304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,2,128,1,float16,float16,0,0.028351999819278717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,2,128,1,fp8,fp8,0,0.027029333015282948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,8,128,1,float16,fp8,0,0.02914133419593175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,8,128,1,float16,float16,0,0.02922133356332779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,16,8,128,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,16,128,1,float16,float16,0,0.025994665920734406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,16,128,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,16,128,1,fp8,fp8,0,0.02456533412138621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,1,128,1,fp8,fp8,0,0.023743999501069386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,1,128,1,float16,fp8,0,0.025578667720158894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,2,128,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,2,128,1,float16,float16,0,0.025077333052953083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,1,128,1,float16,float16,0,0.025290665527184803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,2,128,1,fp8,fp8,0,0.024186665813128155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,4,128,1,float16,float16,0,0.025642665723959606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,4,128,1,fp8,fp8,0,0.02456533412138621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,4,128,1,float16,fp8,0,0.02589333305756251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,8,128,1,float16,fp8,0,0.025888000925381977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,8,128,1,float16,float16,0,0.025797332326571148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,16,8,128,1,fp8,fp8,0,0.02426133304834366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,1,128,1,float16,fp8,0,0.6496640046437582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,1,128,1,fp8,fp8,0,0.6360960006713867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,1,128,1,float16,float16,0,0.6571893294652303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,2,128,1,float16,float16,0,0.6641173362731934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,2,128,1,float16,fp8,0,0.6577333211898804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,2,128,1,fp8,fp8,0,0.6876693566640218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,4,128,1,float16,fp8,0,0.7210133075714111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,4,128,1,float16,float16,0,0.718074639638265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,1,128,1,float16,float16,0,0.3385546604792277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,4,128,1,fp8,fp8,0,0.7637013594309489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,16,128,1,float16,float16,0,0.41998398303985596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,16,128,1,float16,fp8,0,0.4082133372624715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,16,128,1,fp8,fp8,0,0.401589314142863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,8,128,1,float16,float16,0,0.7322933673858643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,1,128,1,float16,fp8,0,0.33624001344045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,8,128,1,fp8,fp8,0,0.7668320337931315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,16,8,128,1,float16,fp8,0,0.7510293324788412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,1,128,1,fp8,fp8,0,0.32551999886830646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,2,128,1,float16,float16,0,0.3436533212661743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,2,128,1,float16,fp8,0,0.33909332752227783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,2,128,1,fp8,fp8,0,0.3516319990158081
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,4,128,1,float16,float16,0,0.362282673517863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,4,128,1,float16,fp8,0,0.3555413484573364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,4,128,1,fp8,fp8,0,0.39216001828511554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,1,128,1,float16,float16,0,0.17988799015680948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,1,128,1,float16,fp8,0,0.1790293256441752
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,8,128,1,float16,fp8,0,0.370959997177124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,16,128,1,float16,float16,0,0.21740800142288208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,8,128,1,float16,float16,0,0.36644800504048664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,16,128,1,float16,fp8,0,0.21395200490951538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,16,8,128,1,fp8,fp8,0,0.39717332522074383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,16,128,1,fp8,fp8,0,0.21194666624069214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,2,128,1,float16,float16,0,0.18168532848358154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,1,128,1,fp8,fp8,0,0.17490132649739584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,2,128,1,float16,fp8,0,0.1811786691347758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,2,128,1,fp8,fp8,0,0.18395199378331503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,4,128,1,float16,fp8,0,0.18821867307027182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,4,128,1,fp8,fp8,0,0.20561067263285318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,4,128,1,float16,float16,0,0.19076800346374512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,8,128,1,float16,float16,0,0.19426133235295615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,16,128,1,float16,float16,0,0.12177067001660664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,16,128,1,float16,fp8,0,0.11739733815193176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,8,128,1,float16,fp8,0,0.19384533166885376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,1,128,1,float16,float16,0,0.10021332899729411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,16,128,1,fp8,fp8,0,0.11505599816640218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,1,128,1,fp8,fp8,0,0.09842666983604431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,1,128,1,float16,fp8,0,0.10015466809272766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,16,8,128,1,fp8,fp8,0,0.20571200052897134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,2,128,1,fp8,fp8,0,0.1006719966729482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,4,128,1,float16,fp8,0,0.10657599568367004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,4,128,1,float16,float16,0,0.10658666491508484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,2,128,1,float16,fp8,0,0.10108799735705058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,4,128,1,fp8,fp8,0,0.11145599683125813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,2,128,1,float16,float16,0,0.1016480028629303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,8,128,1,float16,fp8,0,0.10821333527565002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,8,128,1,float16,float16,0,0.11076266566912334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,1,128,1,float16,float16,0,0.056847999493281044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,16,8,128,1,fp8,fp8,0,0.11230933666229248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,16,128,1,float16,float16,0,0.07086400190989177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,16,128,1,fp8,fp8,0,0.06628266473611195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,16,128,1,float16,fp8,0,0.07042666773001353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,1,128,1,float16,fp8,0,0.05741333464781443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,1,128,1,fp8,fp8,0,0.054341331124305725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,2,128,1,float16,float16,0,0.05855466425418854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,2,128,1,float16,fp8,0,0.058042665322621666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,2,128,1,fp8,fp8,0,0.056794668237368263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,4,128,1,float16,float16,0,0.060602664947509766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,4,128,1,float16,fp8,0,0.06090133388837179
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,4,128,1,fp8,fp8,0,0.061994666854540505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,8,128,1,float16,float16,0,0.06273599962393443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,16,128,1,float16,float16,0,0.04197866717974345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,1,128,1,float16,float16,0,0.03573333223660787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,8,128,1,float16,fp8,0,0.06252799928188324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,16,128,1,float16,fp8,0,0.040549332896868386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,16,8,128,1,fp8,fp8,0,0.0627040018637975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,1,128,1,float16,fp8,0,0.03644266724586487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,16,128,1,fp8,fp8,0,0.03923733284076055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,1,128,1,fp8,fp8,0,0.03342933456103007
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,2,128,1,float16,float16,0,0.036517334481080375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,2,128,1,float16,fp8,0,0.036320000886917114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,4,128,1,float16,float16,0,0.03690666705369949
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,8,128,1,float16,float16,0,0.037248000502586365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,4,128,1,float16,fp8,0,0.03718933214743932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,2,128,1,fp8,fp8,0,0.03426666557788849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,8,128,1,float16,fp8,0,0.03762666632731756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,4,128,1,fp8,fp8,0,0.03642666588226954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,16,8,128,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,16,128,1,float16,float16,0,0.026906666656335194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,16,128,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,16,128,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,1,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,1,128,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,2,128,1,float16,fp8,0,0.02606400102376938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,2,128,1,float16,float16,0,0.025663999219735462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,2,128,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,4,128,1,float16,fp8,0,0.026144000391165417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,1,128,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,4,128,1,float16,float16,0,0.026186667382717133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,4,128,1,fp8,fp8,0,0.024901332954565685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,8,128,1,float16,float16,0,0.02809600035349528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,8,128,1,float16,fp8,0,0.026821332673231762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,16,8,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,16,128,1,float16,float16,0,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,16,128,1,float16,fp8,0,0.022522665560245514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,1,128,1,float16,float16,0,0.021877333521842957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,16,128,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,1,128,1,float16,fp8,0,0.021989333132902782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,2,128,1,float16,float16,0,0.021850667893886566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,2,128,1,float16,fp8,0,0.021882665654023487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,4,128,1,float16,float16,0,0.0223786657055219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,1,128,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,2,128,1,fp8,fp8,0,0.02067199970285098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,4,128,1,float16,fp8,0,0.022511998812357586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,8,128,1,float16,fp8,0,0.022255999346574146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,4,128,1,fp8,fp8,0,0.021498667697111767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,8,128,1,float16,float16,0,0.022090665996074677
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,16,128,1,float16,float16,0,0.020975999534130096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,16,8,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,16,128,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,1,128,1,float16,float16,0,0.02075200031201045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,1,128,1,float16,fp8,0,0.02086399992307027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,16,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,1,128,1,fp8,fp8,0,0.019648000597953796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,2,128,1,float16,float16,0,0.020746666938066483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,2,128,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,2,128,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,4,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,4,128,1,float16,float16,0,0.020746666938066483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,4,128,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,8,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,8,128,1,fp8,fp8,0,0.020021333048741024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,16,8,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,1,128,1,float16,float16,0,0.3139946659406026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,1,128,1,float16,fp8,0,0.3106186588605245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,1,128,1,fp8,fp8,0,0.3269813259442647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,2,128,1,float16,float16,0,0.31863999366760254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,2,128,1,float16,fp8,0,0.31798932949701947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,2,128,1,fp8,fp8,0,0.35304534435272217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,4,128,1,float16,float16,0,0.33905601501464844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,4,128,1,float16,fp8,0,0.3333866596221924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,4,128,1,fp8,fp8,0,0.3911093473434448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,16,128,1,float16,float16,0,0.21726399660110474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,8,128,1,float16,float16,0,0.3408799966176351
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,8,128,1,float16,fp8,0,0.33852799733479816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,16,8,128,1,fp8,fp8,0,0.39427733421325684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,1,128,1,float16,float16,0,0.1672053337097168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,16,128,1,float16,fp8,0,0.20716800292332968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,16,128,1,fp8,fp8,0,0.2100213368733724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,1,128,1,float16,fp8,0,0.16731733083724976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,2,128,1,float16,fp8,0,0.16953599452972412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,1,128,1,fp8,fp8,0,0.17507733901341757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,2,128,1,float16,float16,0,0.1716266671816508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,2,128,1,fp8,fp8,0,0.18363199631373087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,4,128,1,float16,float16,0,0.18012267351150513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,4,128,1,float16,fp8,0,0.17706666390101114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,4,128,1,fp8,fp8,0,0.20256000757217407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,8,128,1,float16,float16,0,0.18366400400797525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,16,128,1,float16,float16,0,0.12306132912635803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,8,128,1,float16,fp8,0,0.1800959904988607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,16,128,1,float16,fp8,0,0.11707199613253276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,16,8,128,1,fp8,fp8,0,0.20863467454910278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,1,128,1,float16,float16,0,0.09513066212336223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,1,128,1,float16,fp8,0,0.09505599737167358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,16,128,1,fp8,fp8,0,0.1145919958750407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,1,128,1,fp8,fp8,0,0.09776533643404643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,2,128,1,float16,float16,0,0.09780800342559814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,2,128,1,float16,fp8,0,0.09753066301345825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,2,128,1,fp8,fp8,0,0.09869333108266194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,4,128,1,float16,float16,0,0.10342400272687276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,4,128,1,fp8,fp8,0,0.11010133226712544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,4,128,1,float16,fp8,0,0.10123200217882793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,8,128,1,float16,fp8,0,0.10371733705202739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,8,128,1,float16,float16,0,0.10538132985432942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,16,128,1,float16,float16,0,0.06765866776307423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,16,128,1,float16,fp8,0,0.06544533371925354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,16,8,128,1,fp8,fp8,0,0.11141332983970642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,16,128,1,fp8,fp8,0,0.06528000036875407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,1,128,1,float16,fp8,0,0.05483733117580414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,1,128,1,float16,float16,0,0.054933334390322365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,1,128,1,fp8,fp8,0,0.05440000196297964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,2,128,1,float16,float16,0,0.05562133093674978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,2,128,1,float16,fp8,0,0.05522133409976959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,2,128,1,fp8,fp8,0,0.05585599939028422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,4,128,1,fp8,fp8,0,0.06257600088914235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,4,128,1,float16,float16,0,0.05793599784374237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,8,128,1,float16,fp8,0,0.060047999024391174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,4,128,1,float16,fp8,0,0.059157331784566246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,8,128,1,float16,float16,0,0.059477334221204124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,16,8,128,1,fp8,fp8,0,0.06301333506902058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,16,128,1,float16,float16,0,0.03781333317359289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,16,128,1,float16,fp8,0,0.03799466788768768
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,1,128,1,float16,float16,0,0.034272000193595886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,16,128,1,fp8,fp8,0,0.03948266555865606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,1,128,1,float16,fp8,0,0.03419733295838038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,1,128,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,2,128,1,fp8,fp8,0,0.034341332813103996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,2,128,1,float16,float16,0,0.03479466587305069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,4,128,1,float16,float16,0,0.03572800010442734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,2,128,1,float16,fp8,0,0.03509866694609324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,4,128,1,float16,fp8,0,0.036015999813874565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,4,128,1,fp8,fp8,0,0.03614933292071024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,8,128,1,float16,float16,0,0.035962666074434914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,8,128,1,float16,fp8,0,0.036176001032193504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,16,8,128,1,fp8,fp8,0,0.03668266783157984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,16,128,1,float16,fp8,0,0.0262719988822937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,16,128,1,float16,float16,0,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,1,128,1,float16,float16,0,0.02439466615517934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,1,128,1,float16,fp8,0,0.02463999887307485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,1,128,1,fp8,fp8,0,0.022895999252796173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,2,128,1,float16,float16,0,0.02460266649723053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,2,128,1,float16,fp8,0,0.024735999604066212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,16,128,1,fp8,fp8,0,0.025045332809289295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,2,128,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,4,128,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,4,128,1,fp8,fp8,0,0.024858665963013966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,8,128,1,float16,float16,0,0.024858665963013966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,8,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,4,128,1,float16,float16,0,0.025237334271272022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,16,8,128,1,fp8,fp8,0,0.02454400062561035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,16,128,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,16,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,16,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,1,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,1,128,1,float16,fp8,0,0.02046400060256322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,1,128,1,fp8,fp8,0,0.02092266579469045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,2,128,1,fp8,fp8,0,0.02032533288002014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,2,128,1,float16,fp8,0,0.020645332833131153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,2,128,1,float16,float16,0,0.02049066623051961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,4,128,1,float16,float16,0,0.020741333564122517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,4,128,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,8,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,8,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,8,128,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,16,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,16,4,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,16,128,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,16,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,1,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,1,128,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,2,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,1,128,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,2,128,1,float16,fp8,0,0.019610666980346043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,4,128,1,float16,fp8,0,0.019727999965349834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,4,128,1,float16,float16,0,0.023183998962243397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,2,128,1,fp8,fp8,0,0.020026666422684986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,8,128,1,float16,fp8,0,0.019845332950353622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,8,128,1,float16,float16,0,0.01947733387351036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,4,128,1,fp8,fp8,0,0.019776000330845516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,16,128,1,float16,float16,0,0.01828266680240631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,16,8,128,1,fp8,fp8,0,0.01964266722400983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,16,128,1,float16,fp8,0,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,1,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,1,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,16,128,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,2,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,2,128,1,float16,float16,0,0.018874666343132656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,2,128,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,1,128,1,float16,float16,0,0.018826667219400406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,4,128,1,float16,float16,0,0.018538666268189747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,8,128,1,float16,fp8,0,0.019621333728233974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,4,128,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,8,128,1,float16,float16,0,0.018629333625237148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,4,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,16,8,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,1,128,1,float16,float16,0,0.20414932568868002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,1,128,1,fp8,fp8,0,0.22492265701293945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,1,128,1,float16,fp8,0,0.20371200640996298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,2,128,1,float16,float16,0,0.20841066042582193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,2,128,1,float16,fp8,0,0.20702399810155234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,2,128,1,fp8,fp8,0,0.23754666248957315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,4,128,1,float16,fp8,0,0.2134880026181539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,4,128,1,float16,float16,0,0.21753066778182983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,4,128,1,fp8,fp8,0,0.25810666879018146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,16,128,1,float16,float16,0,0.12541866302490234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,1,128,1,float16,float16,0,0.11158399780591328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,8,128,1,float16,float16,0,0.22124799092610678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,8,128,1,fp8,fp8,0,0.26154667139053345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,1,128,1,float16,fp8,0,0.11179199814796448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,16,8,128,1,float16,fp8,0,0.21772799889246622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,16,128,1,float16,fp8,0,0.12256000439325969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,16,128,1,fp8,fp8,0,0.14106667041778564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,1,128,1,fp8,fp8,0,0.1237440009911855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,2,128,1,float16,float16,0,0.11436800161997478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,2,128,1,fp8,fp8,0,0.12756799658139548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,2,128,1,float16,fp8,0,0.11287466684977214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,4,128,1,float16,float16,0,0.11983999609947205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,4,128,1,fp8,fp8,0,0.13597333431243896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,4,128,1,float16,fp8,0,0.11873599886894226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,16,128,1,float16,float16,0,0.06997333467006683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,8,128,1,float16,fp8,0,0.121888001759847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,8,128,1,fp8,fp8,0,0.13716266552607217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,16,128,1,float16,fp8,0,0.06863466898600261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,16,8,128,1,float16,float16,0,0.12404266993204753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,16,128,1,fp8,fp8,0,0.07938666641712189
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,1,128,1,float16,float16,0,0.06344533463319142
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,1,128,1,fp8,fp8,0,0.06890666484832764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,1,128,1,float16,fp8,0,0.06384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,2,128,1,float16,float16,0,0.06418133278687795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,2,128,1,fp8,fp8,0,0.07041066884994507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,2,128,1,float16,fp8,0,0.064410666624705
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,4,128,1,float16,fp8,0,0.06758933266003926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,4,128,1,float16,float16,0,0.06709333260854085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,8,128,1,float16,float16,0,0.06809066732724507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,4,128,1,fp8,fp8,0,0.07613866527875264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,8,128,1,fp8,fp8,0,0.07666133344173431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,16,128,1,float16,float16,0,0.04083200047413508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,16,128,1,float16,fp8,0,0.03988266736268997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,16,128,1,fp8,fp8,0,0.04674133161703745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,16,8,128,1,float16,fp8,0,0.06807466844717662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,1,128,1,float16,fp8,0,0.03832533210515976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,1,128,1,float16,float16,0,0.03847466657559077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,2,128,1,float16,float16,0,0.03878399978081385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,2,128,1,fp8,fp8,0,0.041450666884581246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,2,128,1,float16,fp8,0,0.03895466774702072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,1,128,1,fp8,fp8,0,0.04144000013669332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,4,128,1,float16,float16,0,0.040106666584809623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,4,128,1,fp8,fp8,0,0.043706665436426796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,4,128,1,float16,fp8,0,0.04037333279848099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,8,128,1,float16,fp8,0,0.0401653324564298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,8,128,1,fp8,fp8,0,0.0443200021982193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,16,128,1,float16,float16,0,0.026906666656335194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,16,8,128,1,float16,float16,0,0.04011733333269755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,16,128,1,fp8,fp8,0,0.028901333610216778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,16,128,1,float16,fp8,0,0.026933332284291584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,1,128,1,float16,float16,0,0.025818665822347004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,1,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,1,128,1,float16,fp8,0,0.02619733413060506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,2,128,1,float16,float16,0,0.025989333788553875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,2,128,1,float16,fp8,0,0.026170666019121807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,4,128,1,float16,float16,0,0.026719999810059864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,2,128,1,fp8,fp8,0,0.027999999622503918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,4,128,1,fp8,fp8,0,0.028944000601768494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,4,128,1,float16,fp8,0,0.0269813338915507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,8,128,1,float16,float16,0,0.026560001075267792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,8,128,1,fp8,fp8,0,0.029088000456492107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,16,128,1,float16,float16,0,0.021018666525681812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,16,128,1,float16,fp8,0,0.02083733429511388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,1,128,1,float16,float16,0,0.01972266659140587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,1,128,1,float16,fp8,0,0.019946667055288952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,16,8,128,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,16,128,1,fp8,fp8,0,0.02181333303451538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,1,128,1,fp8,fp8,0,0.020256000260512035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,2,128,1,float16,float16,0,0.01969066634774208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,2,128,1,float16,fp8,0,0.020143999407688778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,2,128,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,4,128,1,float16,float16,0,0.019882666567961376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,4,128,1,float16,fp8,0,0.020053333292404812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,8,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,8,128,1,float16,fp8,0,0.020053333292404812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,8,128,1,float16,float16,0,0.020181333025296528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,16,128,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,16,128,1,float16,fp8,0,0.018218666315078735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,16,4,128,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,16,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,1,128,1,float16,float16,0,0.018245333184798557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,1,128,1,float16,fp8,0,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,1,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,2,128,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,2,128,1,fp8,fp8,0,0.019861333072185516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,4,128,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,2,128,1,float16,float16,0,0.01834133391578992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,4,128,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,8,128,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,16,128,1,float16,fp8,0,0.018021332720915478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,8,128,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,4,128,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,16,8,128,1,fp8,fp8,0,0.01972266659140587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,16,128,1,float16,float16,0,0.017674667139848072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,16,128,1,fp8,fp8,0,0.018746666610240936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,1,128,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,2,128,1,float16,float16,0,0.0174346665541331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,1,128,1,float16,fp8,0,0.017674667139848072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,1,128,1,float16,float16,0,0.017616000026464462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,2,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,4,128,1,float16,float16,0,0.017711999515692394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,2,128,1,float16,fp8,0,0.01803733284274737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,4,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,4,128,1,float16,fp8,0,0.018288000176350277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,8,128,1,float16,float16,0,0.01746133342385292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,16,128,1,float16,float16,0,0.016773333152135212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,8,128,1,float16,fp8,0,0.018229333062966663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,16,128,1,fp8,fp8,0,0.017952000101407368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,16,8,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,16,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,1,128,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,1,128,1,fp8,fp8,0,0.018053332964579265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,2,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,1,128,1,float16,fp8,0,0.017925333231687546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,2,128,1,float16,fp8,0,0.017759999881188076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,4,128,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,4,128,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,2,128,1,fp8,fp8,0,0.01821333294113477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,8,128,1,float16,float16,0,0.017162666966517765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,8,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,4,128,1,float16,float16,0,0.01747200017174085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,16,8,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,1,128,1,float16,float16,0,0.1446293294429779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,1,128,1,fp8,fp8,0,0.18387200435002646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,1,128,1,float16,fp8,0,0.14479466279347739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,2,128,1,float16,fp8,0,0.1482080022493998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,2,128,1,fp8,fp8,0,0.18618667125701904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,2,128,1,float16,float16,0,0.14913066228230795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,4,128,1,float16,float16,0,0.1545919974644979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,4,128,1,float16,fp8,0,0.1535040040810903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,4,128,1,fp8,fp8,0,0.19573867321014404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,8,128,1,float16,float16,0,0.15837867061297098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,8,128,1,float16,fp8,0,0.15626133481661478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,16,128,1,float16,float16,0,0.08750933408737183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,16,8,128,1,fp8,fp8,0,0.1969226598739624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,16,128,1,float16,fp8,0,0.08539199829101562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,1,128,1,float16,float16,0,0.07753066718578339
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,16,128,1,fp8,fp8,0,0.1095199982325236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,1,128,1,fp8,fp8,0,0.09971200426419576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,1,128,1,float16,fp8,0,0.07696533203125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,2,128,1,float16,float16,0,0.0792746643225352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,2,128,1,fp8,fp8,0,0.10233599940935771
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,2,128,1,float16,fp8,0,0.07845333218574524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,8,128,1,float16,float16,0,0.08539733290672302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,4,128,1,float16,fp8,0,0.08192533254623413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,4,128,1,fp8,fp8,0,0.10749866565068562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,4,128,1,float16,float16,0,0.08226666847864787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,8,128,1,fp8,fp8,0,0.10829866925875346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,16,8,128,1,float16,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,16,128,1,float16,fp8,0,0.04650666813055674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,16,128,1,float16,float16,0,0.04737600187460581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,16,128,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,2,128,1,float16,float16,0,0.04602666695912679
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,2,128,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,1,128,1,float16,float16,0,0.04577066500981649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,2,128,1,fp8,fp8,0,0.05795733133951823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,1,128,1,fp8,fp8,0,0.05709333221117655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,4,128,1,float16,float16,0,0.04674666623274485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,1,128,1,float16,fp8,0,0.04610666632652283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,4,128,1,float16,fp8,0,0.046816001335779824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,4,128,1,fp8,fp8,0,0.06055466830730438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,8,128,1,float16,float16,0,0.046757335464159645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,8,128,1,float16,fp8,0,0.04782933493455251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,16,8,128,1,fp8,fp8,0,0.060218666990598045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,16,128,1,fp8,fp8,0,0.036714665591716766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,16,128,1,float16,fp8,0,0.03033066789309184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,1,128,1,float16,float16,0,0.029674666623274486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,16,128,1,float16,float16,0,0.03035733352104823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,1,128,1,float16,fp8,0,0.030346666773160298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,1,128,1,fp8,fp8,0,0.03554133325815201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,2,128,1,float16,fp8,0,0.030405332644780476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,4,128,1,float16,float16,0,0.030906667311986286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,2,128,1,fp8,fp8,0,0.0359199990828832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,2,128,1,float16,float16,0,0.030752000709374745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,4,128,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,8,128,1,float16,float16,0,0.03109866629044215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,4,128,1,float16,fp8,0,0.031018666923046112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,8,128,1,float16,fp8,0,0.031194667021433514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,16,128,1,float16,float16,0,0.021733333667119343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,16,8,128,1,fp8,fp8,0,0.03729599962631861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,1,128,1,float16,float16,0,0.02183466653029124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,16,128,1,float16,fp8,0,0.02236266682545344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,16,128,1,fp8,fp8,0,0.025807999074459076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,2,128,1,float16,fp8,0,0.021925332645575207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,1,128,1,float16,fp8,0,0.021744000415007275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,2,128,1,float16,float16,0,0.021930667261282604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,1,128,1,fp8,fp8,0,0.02476266771554947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,4,128,1,float16,float16,0,0.0220320001244545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,4,128,1,float16,fp8,0,0.02271466702222824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,8,128,1,float16,float16,0,0.022202665607134502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,8,128,1,fp8,fp8,0,0.02566933383544286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,8,128,1,float16,fp8,0,0.022229333718617756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,16,4,128,1,fp8,fp8,0,0.025701334079106648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,16,128,1,float16,float16,0,0.017866666118303936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,16,128,1,fp8,fp8,0,0.020154666155576706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,1,128,1,float16,fp8,0,0.01783466711640358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,1,128,1,float16,float16,0,0.017680000513792038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,1,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,16,128,1,float16,fp8,0,0.018645333747069042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,2,128,1,float16,float16,0,0.017658667018016178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,2,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,4,128,1,float16,fp8,0,0.017957333475351334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,2,128,1,float16,fp8,0,0.018021332720915478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,4,128,1,float16,float16,0,0.017573333034912746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,8,128,1,float16,float16,0,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,4,128,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,8,128,1,fp8,fp8,0,0.020037333170572918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,16,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,16,128,1,float16,float16,0,0.016447999825080235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,16,8,128,1,float16,fp8,0,0.018085333208243053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,1,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,2,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,2,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,1,128,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,1,128,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,16,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,2,128,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,4,128,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,4,128,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,4,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,8,128,1,float16,float16,0,0.017322666943073273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,8,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,16,128,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,16,128,1,fp8,fp8,0,0.017829333742459614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,16,128,1,float16,float16,0,0.016645333419243496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,16,8,128,1,float16,fp8,0,0.017674667139848072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,1,128,1,float16,float16,0,0.016437333077192307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,1,128,1,fp8,fp8,0,0.018474667022625606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,2,128,1,float16,float16,0,0.016666666915019352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,2,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,2,128,1,fp8,fp8,0,0.017898666361967724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,1,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,4,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,4,128,1,fp8,fp8,0,0.018197332819302876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,8,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,4,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,8,128,1,fp8,fp8,0,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,16,8,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,16,128,1,float16,float16,0,0.015989333391189575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,16,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,1,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,2,128,1,float16,float16,0,0.016399999459584553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,2,128,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,16,128,1,fp8,fp8,0,0.01793066660563151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,2,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,1,128,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,4,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,4,128,1,float16,float16,0,0.016293333222468693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,4,128,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,8,128,1,float16,float16,0,0.016613333175579708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,16,8,128,1,fp8,fp8,0,0.018309333672126133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,1,128,1,float16,float16,0,0.12118400136629741
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,1,128,1,fp8,fp8,0,0.16151466965675354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,1,128,1,float16,fp8,0,0.12149866422017415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,2,128,1,float16,fp8,0,0.12201600273450215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,2,128,1,fp8,fp8,0,0.16365866859753928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,4,128,1,float16,float16,0,0.12507733702659607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,4,128,1,fp8,fp8,0,0.1675999959309896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,2,128,1,float16,float16,0,0.8309226830800375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,4,128,1,float16,fp8,0,0.12522666652997336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,8,128,1,float16,float16,0,0.1279093325138092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,16,128,1,float16,float16,0,0.06638399759928386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,16,128,1,float16,fp8,0,0.06578133503595988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,8,128,1,fp8,fp8,0,0.16895999511082968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,16,128,1,fp8,fp8,0,0.0943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,16,8,128,1,float16,fp8,0,0.1276800036430359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,1,128,1,float16,float16,0,0.06724266707897186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,2,128,1,float16,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,2,128,1,float16,float16,0,0.06727466483910878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,2,128,1,fp8,fp8,0,0.09030399719874065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,1,128,1,fp8,fp8,0,0.08855467041333516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,1,128,1,float16,fp8,0,0.06776000062624614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,4,128,1,float16,float16,0,0.06858133276303609
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,4,128,1,float16,fp8,0,0.06956266860167186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,8,128,1,float16,float16,0,0.06965333223342896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,8,128,1,float16,fp8,0,0.06878933310508728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,8,128,1,fp8,fp8,0,0.09251733620961507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,16,128,1,float16,float16,0,0.03940266619126002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,16,4,128,1,fp8,fp8,0,0.09161067008972168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,16,128,1,fp8,fp8,0,0.05284800132115682
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,1,128,1,float16,float16,0,0.04093866546948751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,16,128,1,float16,fp8,0,0.039674667020638786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,1,128,1,float16,fp8,0,0.04101333270470301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,2,128,1,float16,float16,0,0.041082667807737984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,1,128,1,fp8,fp8,0,0.051557332277297974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,2,128,1,float16,fp8,0,0.041109333435694374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,4,128,1,float16,float16,0,0.041573333243529
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,4,128,1,float16,fp8,0,0.04248000184694926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,2,128,1,fp8,fp8,0,0.05197866757710775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,8,128,1,float16,float16,0,0.041477332512537636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,8,128,1,float16,fp8,0,0.042170668641726174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,8,128,1,fp8,fp8,0,0.053413331508636475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,16,4,128,1,fp8,fp8,0,0.053258667389551796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,16,128,1,float16,float16,0,0.02661866694688797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,16,128,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,1,128,1,float16,fp8,0,0.027845333019892376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,1,128,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,16,128,1,fp8,fp8,0,0.03326933334271113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,1,128,1,float16,float16,0,0.027269333600997925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,2,128,1,float16,float16,0,0.027445333699385326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,2,128,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,2,128,1,float16,fp8,0,0.027978666126728058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,4,128,1,float16,float16,0,0.02775999903678894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,4,128,1,float16,fp8,0,0.028165332973003387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,4,128,1,fp8,fp8,0,0.03396799912055334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,8,128,1,fp8,fp8,0,0.03393599887688955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,16,128,1,float16,float16,0,0.020469332734743755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,8,128,1,float16,float16,0,0.028202667832374573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,16,8,128,1,float16,fp8,0,0.02812800059715907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,1,128,1,float16,float16,0,0.020373333245515823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,16,128,1,fp8,fp8,0,0.023749334116776783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,1,128,1,float16,fp8,0,0.02089066555102666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,16,128,1,float16,fp8,0,0.020901332298914593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,1,128,1,fp8,fp8,0,0.02370133250951767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,2,128,1,float16,float16,0,0.02073066681623459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,2,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,4,128,1,float16,float16,0,0.02062400057911873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,4,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,4,128,1,fp8,fp8,0,0.02386133372783661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,2,128,1,fp8,fp8,0,0.02421333392461141
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,8,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,8,128,1,fp8,fp8,0,0.024469333390394848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,16,8,128,1,float16,float16,0,0.021040000021457672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,16,128,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,16,128,1,float16,float16,0,0.017690667261679966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,16,128,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,1,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,1,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,2,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,1,128,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,2,128,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,4,128,1,float16,fp8,0,0.017935999979575474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,4,128,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,8,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,2,128,1,float16,fp8,0,0.017743999759356182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,4,128,1,float16,float16,0,0.017530667285124462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,8,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,16,128,1,float16,float16,0,0.016373333831628162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,16,8,128,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,16,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,1,128,1,float16,float16,0,0.016303999970356624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,16,128,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,2,128,1,float16,float16,0,0.016528000434239704
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,1,128,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,2,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,1,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,4,128,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,4,128,1,float16,float16,0,0.01676799977819125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,4,128,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,2,128,1,fp8,fp8,0,0.018325333793958027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,8,128,1,float16,float16,0,0.016522667060295742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,8,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,16,8,128,1,fp8,fp8,0,0.01798933371901512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,16,128,1,float16,fp8,0,0.016437333077192307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,16,128,1,fp8,fp8,0,0.018191999445358913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,1,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,16,128,1,float16,float16,0,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,1,128,1,fp8,fp8,0,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,1,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,2,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,2,128,1,fp8,fp8,0,0.017909333109855652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,4,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,2,128,1,float16,float16,0,0.02046400060256322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,4,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,8,128,1,float16,float16,0,0.016677333662907284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,16,128,1,float16,float16,0,0.01598400001724561
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,8,128,1,fp8,fp8,0,0.018464000274737675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,4,128,1,fp8,fp8,0,0.01850133389234543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,16,128,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,16,8,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,1,128,1,float16,float16,0,0.016234666109085083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,1,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,1,128,1,fp8,fp8,0,0.018229333062966663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,16,128,1,fp8,fp8,0,0.01810666670401891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,2,128,1,float16,float16,0,0.016165333489576977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,2,128,1,fp8,fp8,0,0.017877332866191864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,4,128,1,float16,fp8,0,0.016682667036851246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,8,128,1,float16,float16,0,0.01629866659641266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,4,128,1,float16,float16,0,0.016074666132529575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,4,128,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,2,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,8,128,1,fp8,fp8,0,0.017887999614079792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,16,8,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,128,1,float16,float16,0,12.699007670084635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,128,1,fp8,fp8,0,8.350784301757812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,128,1,float16,fp8,0,12.882245381673178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,128,1,float16,fp8,0,12.54214350382487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,128,1,float16,float16,0,12.49078369140625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,128,1,fp8,fp8,0,8.280293146769205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,128,1,float16,float16,0,12.60644276936849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,128,1,float16,fp8,0,12.47766367594401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,128,1,float16,float16,0,6.196271896362305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,128,1,float16,fp8,0,6.2797495524088545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,128,1,fp8,fp8,0,4.169386545817058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,128,1,float16,float16,0,6.345578511555989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,128,1,float16,float16,0,6.2662506103515625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,128,1,float16,fp8,0,6.28331184387207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,128,1,fp8,fp8,0,8.461082458496094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,128,1,fp8,fp8,0,4.135397275288899
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,128,1,fp8,fp8,0,4.032592137654622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,128,1,float16,fp8,0,5.987402598063151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,128,1,float16,float16,0,6.3369598388671875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,128,1,float16,float16,0,3.074000040690104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,128,1,fp8,fp8,0,4.189850807189941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,128,1,float16,fp8,0,6.163024266560872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,128,1,float16,fp8,0,3.0580320358276367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,128,1,fp8,fp8,0,2.1124265988667807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,128,1,float16,fp8,0,3.0498879750569663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,128,1,float16,float16,0,3.082207997639974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,128,1,fp8,fp8,0,2.104229291280111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,128,1,float16,float16,0,3.1436052322387695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,128,1,float16,fp8,0,3.046880086263021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,128,1,fp8,fp8,0,2.100874741872152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,128,1,float16,fp8,0,3.1021973292032876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,128,1,float16,float16,0,1.6253973642985027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,128,1,float16,fp8,0,1.6135360399882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,128,1,float16,float16,0,3.1075572967529297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,128,1,fp8,fp8,0,2.1093600591023765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,128,1,float16,float16,0,1.644752025604248
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,128,1,float16,fp8,0,1.6400160789489746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,128,1,fp8,fp8,0,1.0809226830800374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,128,1,fp8,fp8,0,1.0679840246836345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,128,1,float16,float16,0,1.6474666595458984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,128,1,fp8,fp8,0,1.0662506421407063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,128,1,float16,fp8,0,1.629733403523763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,128,1,float16,float16,0,1.651301383972168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,128,1,float16,fp8,0,1.6401866277058919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,128,1,fp8,fp8,0,1.0775039990743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,128,1,float16,float16,0,7.329973220825195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,128,1,float16,fp8,0,7.268933614095052
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,128,1,fp8,fp8,0,4.785109202067058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,128,1,float16,float16,0,7.199253082275391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,128,1,fp8,fp8,0,4.739919980367024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,128,1,float16,fp8,0,7.226650873819987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,128,1,float16,float16,0,7.171578725179036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,128,1,float16,fp8,0,7.132277170817058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,128,1,float16,float16,0,3.539706548055013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,128,1,float16,fp8,0,3.533935864766439
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,128,1,fp8,fp8,0,2.412048021952311
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,128,1,float16,float16,0,3.5553547541300454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,128,1,float16,fp8,0,3.4648586908976235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,128,1,float16,float16,0,3.5554558436075845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,128,1,fp8,fp8,0,4.900597254435222
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,128,1,fp8,fp8,0,2.44813871383667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,128,1,float16,fp8,0,3.492410659790039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,128,1,fp8,fp8,0,2.4129652976989746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,128,1,float16,float16,0,3.5780747731526694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,128,1,float16,float16,0,1.8057492574055989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,128,1,float16,fp8,0,3.613925298055013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,128,1,float16,fp8,0,1.8239146868387859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,128,1,fp8,fp8,0,1.2935840288798015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,128,1,fp8,fp8,0,2.4309706687927246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,128,1,float16,float16,0,1.8211520512898762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,128,1,float16,fp8,0,1.8020213445027669
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,128,1,fp8,fp8,0,1.2736106713612874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,128,1,float16,float16,0,1.8236692746480305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,128,1,fp8,fp8,0,1.2805493672688801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,128,1,float16,fp8,0,1.8254720369974773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,128,1,float16,float16,0,1.8366506894429524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,128,1,fp8,fp8,0,1.284864028294881
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,128,1,float16,fp8,0,1.8171680768330891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,128,1,float16,float16,0,0.9873440265655518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,128,1,float16,fp8,0,0.99727463722229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,128,1,float16,float16,0,1.0073973337809246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,128,1,float16,fp8,0,0.9851306279500326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,128,1,fp8,fp8,0,0.6584959824879965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,128,1,fp8,fp8,0,0.6510773499806722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,128,1,float16,float16,0,1.0042773087819417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,128,1,float16,fp8,0,0.9968213240305582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,128,1,fp8,fp8,0,0.6523413260777792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,128,1,float16,float16,0,1.0119199752807617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,128,1,float16,fp8,0,1.0056373278299968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,128,1,fp8,fp8,0,0.6597493489583334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,128,1,float16,float16,0,4.84390385945638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,128,1,float16,fp8,0,5.02730147043864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,128,1,fp8,fp8,0,3.3932692209879556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,128,1,fp8,fp8,0,3.4308907190958657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,128,1,float16,float16,0,5.1138614018758135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,128,1,float16,fp8,0,4.94595209757487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,128,1,float16,float16,0,5.008261362711589
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,128,1,float16,fp8,0,5.017871856689453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,128,1,float16,float16,0,2.5091892878214517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,128,1,float16,float16,0,2.4911039670308432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,128,1,float16,fp8,0,2.4606986045837402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,128,1,fp8,fp8,0,1.756922721862793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,128,1,fp8,fp8,0,3.4243946075439453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,128,1,float16,float16,0,2.541722615559896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,128,1,float16,fp8,0,2.4952267011006675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,128,1,fp8,fp8,0,1.7949760754903157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,128,1,float16,fp8,0,2.484586715698242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,128,1,float16,fp8,0,2.5134453773498535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,128,1,fp8,fp8,0,1.7752000490824382
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,128,1,float16,float16,0,1.3114133675893147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,128,1,fp8,fp8,0,1.7581173578898113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,128,1,float16,float16,0,2.4943466186523438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,128,1,float16,fp8,0,1.3036426703135173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,128,1,fp8,fp8,0,0.9192480246225992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,128,1,float16,float16,0,1.3230773607889812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,128,1,float16,fp8,0,1.317845344543457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,128,1,fp8,fp8,0,0.8864426612854004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,128,1,float16,float16,0,1.3202773729960124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,128,1,float16,fp8,0,1.3118666807810466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,128,1,fp8,fp8,0,0.8882506688435873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,128,1,float16,fp8,0,1.3253493309020996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,128,1,float16,float16,0,1.3239733378092449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,128,1,fp8,fp8,0,0.9025546709696451
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,128,1,float16,float16,0,0.662277340888977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,128,1,float16,fp8,0,0.6494506597518921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,128,1,fp8,fp8,0,0.48775466283162433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,128,1,fp8,fp8,0,0.48046398162841797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,128,1,float16,fp8,0,0.6502559979756674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,128,1,float16,float16,0,0.6562720139821371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,128,1,float16,float16,0,0.6543840169906616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,128,1,float16,fp8,0,0.6463306744893392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,128,1,fp8,fp8,0,0.4832586844762166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,128,1,float16,float16,0,0.6593279838562012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,128,1,float16,fp8,0,0.6515200138092041
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,128,1,fp8,fp8,0,0.4864799976348877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,128,1,float16,float16,0,6.582400004069011
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,128,1,float16,fp8,0,6.403327941894531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,128,1,fp8,fp8,0,4.541514714558919
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,128,1,fp8,fp8,0,4.520021438598633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,128,1,float16,fp8,0,6.515157063802083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,128,1,float16,float16,0,6.498581568400065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,128,1,float16,float16,0,6.618794759114583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,128,1,float16,fp8,0,6.750282923380534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,128,1,float16,float16,0,3.26639461517334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,128,1,float16,fp8,0,3.2472960154215493
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,128,1,fp8,fp8,0,2.3035839398701987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,128,1,float16,float16,0,3.2717758814493814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,128,1,float16,float16,0,3.2178611755371094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,128,1,fp8,fp8,0,4.574794769287109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,128,1,fp8,fp8,0,2.3653546969095864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,128,1,fp8,fp8,0,2.3135040601094565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,128,1,float16,fp8,0,3.24454402923584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,128,1,float16,fp8,0,3.2280747095743814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,128,1,float16,float16,0,3.2884480158487954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,128,1,float16,float16,0,1.684981346130371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,128,1,float16,fp8,0,3.3308318456014
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,128,1,float16,fp8,0,1.6549919446309407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,128,1,fp8,fp8,0,2.327477296193441
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,128,1,float16,fp8,0,1.666368007659912
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,128,1,float16,float16,0,1.685695966084798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,128,1,fp8,fp8,0,1.2363573710123699
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,128,1,fp8,fp8,0,1.2000319957733154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,128,1,float16,float16,0,1.6882079442342122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,128,1,float16,fp8,0,1.6725600560506184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,128,1,fp8,fp8,0,1.2053653399149578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,128,1,float16,fp8,0,1.6635252634684246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,128,1,fp8,fp8,0,1.2162559827168782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,128,1,float16,float16,0,1.6760692596435547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,128,1,float16,fp8,0,0.8907039960225424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,128,1,float16,float16,0,0.8935840129852295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,128,1,float16,float16,0,0.9031946659088135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,128,1,fp8,fp8,0,0.6342080036799113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,128,1,float16,fp8,0,0.8892959753672282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,128,1,fp8,fp8,0,0.6117813189824423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,128,1,float16,float16,0,0.902885357538859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,128,1,fp8,fp8,0,0.6145973205566406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,128,1,float16,float16,0,0.9008853435516357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,128,1,fp8,fp8,0,0.6217600107192993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,128,1,float16,fp8,0,0.9000053405761719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,128,1,float16,float16,0,0.44788265228271484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,128,1,float16,fp8,0,0.8900746504465739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,128,1,float16,float16,0,0.4456319808959961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,128,1,float16,fp8,0,0.44356799125671387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,128,1,fp8,fp8,0,0.34563199679056805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,128,1,float16,fp8,0,0.43957332770029706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,128,1,float16,float16,0,0.4479413429896037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,128,1,fp8,fp8,0,0.33663467566172284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,128,1,float16,fp8,0,0.44092798233032227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,128,1,fp8,fp8,0,0.3365120093027751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,128,1,float16,float16,0,0.4485599994659424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,128,1,float16,fp8,0,0.4436746835708618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,128,1,fp8,fp8,0,0.341045339902242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,128,1,float16,float16,0,3.871781349182129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,128,1,fp8,fp8,0,2.7709760665893555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,128,1,float16,fp8,0,3.7812105814615884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,128,1,float16,float16,0,3.846816062927246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,128,1,fp8,fp8,0,2.790442784627279
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,128,1,float16,fp8,0,3.8568747838338218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,128,1,float16,fp8,0,3.8015947341918945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,128,1,float16,float16,0,3.8835573196411133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,128,1,float16,float16,0,1.9361707369486492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,128,1,float16,fp8,0,1.9061120351155598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,128,1,float16,float16,0,1.9798293113708496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,128,1,fp8,fp8,0,1.423258622487386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,128,1,float16,float16,0,1.937546730041504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,128,1,fp8,fp8,0,2.8068319956461587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,128,1,float16,fp8,0,1.9673333168029785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,128,1,fp8,fp8,0,1.4784960746765137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,128,1,float16,fp8,0,1.9173386891682942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,128,1,float16,float16,0,1.9725386301676433
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,128,1,float16,float16,0,1.0140586694081624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,128,1,fp8,fp8,0,1.4271094004313152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,128,1,fp8,fp8,0,1.4466026624043782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,128,1,float16,fp8,0,1.9396053949991863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,128,1,float16,float16,0,1.01910400390625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,128,1,float16,fp8,0,1.0273760159810383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,128,1,float16,fp8,0,1.0101173718770344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,128,1,fp8,fp8,0,0.7509760061899821
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,128,1,fp8,fp8,0,0.7853333155314127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,128,1,float16,fp8,0,1.0062399705251057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,128,1,fp8,fp8,0,0.7552853425343832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,128,1,float16,float16,0,1.0155519644419353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,128,1,float16,float16,0,1.020522673924764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,128,1,float16,fp8,0,1.0200533072153728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,128,1,float16,float16,0,0.56004265944163
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,128,1,float16,float16,0,0.558357318242391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,128,1,fp8,fp8,0,0.38648533821105957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,128,1,fp8,fp8,0,0.7635146776835123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,128,1,float16,fp8,0,0.555951992670695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,128,1,fp8,fp8,0,0.4047573407491048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,128,1,float16,fp8,0,0.5497653484344482
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,128,1,float16,float16,0,0.5622613430023193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,128,1,float16,fp8,0,0.5556159814198812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,128,1,fp8,fp8,0,0.3889760176340739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,128,1,float16,float16,0,0.5635519822438558
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,128,1,float16,fp8,0,0.5588639974594116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,128,1,float16,float16,0,0.2845919926961263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,128,1,fp8,fp8,0,0.22899732987085977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,128,1,fp8,fp8,0,0.3944053252538045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,128,1,float16,float16,0,0.28201067447662354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,128,1,float16,fp8,0,0.28067199389139813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,128,1,float16,fp8,0,0.2779253323872884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,128,1,fp8,fp8,0,0.21850132942199707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,128,1,float16,float16,0,0.2828693389892578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,128,1,fp8,fp8,0,0.22221867243448892
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,128,1,float16,fp8,0,0.27778132756551105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,128,1,float16,float16,0,0.2832000056902568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,128,1,float16,fp8,0,0.2815200090408325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,128,1,fp8,fp8,0,0.22353067000706991
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,128,1,float16,float16,0,3.7195679346720376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,128,1,float16,fp8,0,3.721013387044271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,128,1,fp8,fp8,0,2.828330675760905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,128,1,float16,fp8,0,3.71617062886556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,128,1,float16,float16,0,3.7607574462890625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,128,1,fp8,fp8,0,2.842288017272949
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,128,1,float16,float16,0,3.7788480122884116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,128,1,float16,float16,0,1.8967679341634114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,128,1,float16,fp8,0,3.750234603881836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,128,1,float16,fp8,0,1.8520800272623699
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,128,1,fp8,fp8,0,1.4432427088419597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,128,1,float16,fp8,0,1.9306880633036296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,128,1,fp8,fp8,0,2.87938658396403
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,128,1,fp8,fp8,0,1.5225653648376465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,128,1,float16,float16,0,1.9173280398050945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,128,1,float16,float16,0,1.9052480061848958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,128,1,float16,fp8,0,1.8555946350097656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,128,1,fp8,fp8,0,1.4522347450256348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,128,1,float16,float16,0,1.911354700724284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,128,1,float16,float16,0,0.9706933498382568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,128,1,float16,fp8,0,0.95686936378479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,128,1,float16,float16,0,0.9899360338846842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,128,1,float16,fp8,0,1.893781344095866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,128,1,fp8,fp8,0,0.7904853026072184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,128,1,fp8,fp8,0,1.4726932843526204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,128,1,float16,fp8,0,0.9836053053538004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,128,1,fp8,fp8,0,0.7500053246816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,128,1,float16,fp8,0,0.9614880084991455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,128,1,float16,float16,0,0.9744586944580078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,128,1,fp8,fp8,0,0.7531786759694418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,128,1,float16,float16,0,0.9877653121948242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,128,1,float16,float16,0,0.5287893215815226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,128,1,float16,fp8,0,0.9755040009816488
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,128,1,fp8,fp8,0,0.7630986372629801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,128,1,float16,fp8,0,0.5289280017217001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,128,1,float16,fp8,0,0.5158133506774902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,128,1,float16,float16,0,0.5214399894078573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,128,1,fp8,fp8,0,0.38365332285563153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,128,1,float16,fp8,0,0.5178666512171427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,128,1,fp8,fp8,0,0.4131466547648112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,128,1,fp8,fp8,0,0.3863573471705119
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,128,1,float16,float16,0,0.5254293282826742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,128,1,float16,float16,0,0.5293279886245728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,128,1,float16,fp8,0,0.5216799974441528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,128,1,fp8,fp8,0,0.3907253344853719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,128,1,float16,float16,0,0.26318933566411334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,128,1,float16,float16,0,0.2737226684888204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,128,1,fp8,fp8,0,0.21238400538762411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,128,1,float16,fp8,0,0.25915199518203735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,128,1,float16,fp8,0,0.27080533901850384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,128,1,fp8,fp8,0,0.2249280015627543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,128,1,float16,float16,0,0.2651573419570923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,128,1,fp8,fp8,0,0.214410662651062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,128,1,float16,float16,0,0.26793599128723145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,128,1,float16,fp8,0,0.25968533754348755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,128,1,float16,fp8,0,0.263264000415802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,128,1,fp8,fp8,0,0.21679999430974325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,128,1,float16,float16,0,0.15999466180801392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,128,1,float16,fp8,0,0.15755732854207358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,128,1,fp8,fp8,0,0.1346773306528727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,128,1,float16,float16,0,0.15461333592732748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,128,1,float16,fp8,0,0.15364799896876016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,128,1,float16,fp8,0,0.1539253294467926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,128,1,fp8,fp8,0,0.12618133425712585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,128,1,float16,float16,0,0.15662399927775064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,128,1,fp8,fp8,0,0.12363732854525249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,128,1,float16,fp8,0,0.15527466932932535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,128,1,float16,float16,0,0.15667200088500977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,128,1,fp8,fp8,0,0.1292746663093567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,128,1,float16,float16,0,2.325594743092855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,128,1,fp8,fp8,0,1.8218933741251628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,128,1,float16,fp8,0,2.2507413228352866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,128,1,fp8,fp8,0,1.8304853439331055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,128,1,float16,float16,0,2.3136693636576333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,128,1,float16,fp8,0,2.2603519757588706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,128,1,float16,float16,0,2.344202677408854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,128,1,float16,fp8,0,2.2883946100870767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,128,1,float16,float16,0,1.1735573609670003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,128,1,float16,fp8,0,1.1458133061726887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,128,1,fp8,fp8,0,0.9316373666127523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,128,1,float16,float16,0,1.1750880082448323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,128,1,float16,float16,0,1.2194186846415203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,128,1,fp8,fp8,0,1.8673653602600098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,128,1,float16,fp8,0,1.1929813226064045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,128,1,fp8,fp8,0,0.9991093476613363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,128,1,float16,fp8,0,1.1567893028259277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,128,1,fp8,fp8,0,0.9344426790873209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,128,1,float16,float16,0,1.186575969060262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,128,1,float16,fp8,0,1.164581298828125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,128,1,float16,float16,0,0.6344853242238363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,128,1,float16,fp8,0,0.6242080132166544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,128,1,fp8,fp8,0,0.5262293418248495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,128,1,fp8,fp8,0,0.9538026650746664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,128,1,float16,float16,0,0.6172800064086914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,128,1,fp8,fp8,0,0.4904959996541341
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,128,1,float16,fp8,0,0.6010666688283285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,128,1,float16,float16,0,0.6167786518732706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,128,1,float16,fp8,0,0.6075520118077596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,128,1,fp8,fp8,0,0.49158398310343426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,128,1,float16,float16,0,0.6216586828231812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,128,1,float16,fp8,0,0.6136853297551473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,128,1,fp8,fp8,0,0.50219198067983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,128,1,float16,float16,0,0.3455626567204793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,128,1,float16,float16,0,0.3364959955215454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,128,1,float16,fp8,0,0.3300480047861735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,128,1,float16,fp8,0,0.3408213456471761
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,128,1,fp8,fp8,0,0.2532426714897156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,128,1,float16,float16,0,0.3391306797663371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,128,1,fp8,fp8,0,0.27615465720494586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,128,1,float16,fp8,0,0.33453865845998126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,128,1,fp8,fp8,0,0.2557493249575297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,128,1,float16,float16,0,0.34187201658884686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,128,1,float16,fp8,0,0.33590400218963623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,128,1,float16,float16,0,0.18076799313227335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,128,1,float16,float16,0,0.17324266831080118
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,128,1,float16,fp8,0,0.17921600739161173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,128,1,fp8,fp8,0,0.25853333870569867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,128,1,float16,fp8,0,0.17013333241144815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,128,1,fp8,fp8,0,0.15491732954978943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,128,1,fp8,fp8,0,0.14342400431632996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,128,1,float16,float16,0,0.17358400424321493
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,128,1,float16,fp8,0,0.1710240046183268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,128,1,fp8,fp8,0,0.1462559998035431
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,128,1,float16,float16,0,0.17587200800577799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,128,1,fp8,fp8,0,0.14735999703407288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,128,1,float16,float16,0,0.10682132840156555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,128,1,float16,fp8,0,0.17164266109466553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,128,1,float16,fp8,0,0.10663466652234395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,128,1,fp8,fp8,0,0.09404266873995464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,128,1,float16,float16,0,0.10347200433413188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,128,1,float16,fp8,0,0.10246933499972026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,128,1,fp8,fp8,0,0.08668266733487447
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,128,1,float16,float16,0,0.10382933417956035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,128,1,float16,fp8,0,0.10228266318639119
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,128,1,float16,float16,0,0.10410133004188538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,128,1,fp8,fp8,0,0.08685866991678874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,128,1,float16,fp8,0,0.10341333349545796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,128,1,fp8,fp8,0,0.08756267031033833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,128,1,fp8,fp8,0,2.016277313232422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,128,1,float16,fp8,0,2.3332746823628745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,128,1,float16,float16,0,2.4419466654459634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,128,1,float16,float16,0,2.456912040710449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,128,1,float16,fp8,0,2.350005308787028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,128,1,fp8,fp8,0,2.022768020629883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,128,1,float16,float16,0,2.4792960484822593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,128,1,float16,fp8,0,2.3671253522237143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,128,1,float16,float16,0,1.2267733414967854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,128,1,float16,fp8,0,1.1805493036905925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,128,1,fp8,fp8,0,1.0150612990061443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,128,1,float16,float16,0,1.2317492961883545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,128,1,fp8,fp8,0,2.057408014933268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,128,1,float16,fp8,0,1.2533120314280193
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,128,1,fp8,fp8,0,1.1069066524505615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,128,1,float16,fp8,0,1.192250649134318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,128,1,float16,float16,0,1.3020693461100261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,128,1,fp8,fp8,0,1.0224800109863281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,128,1,float16,float16,0,0.6276106834411621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,128,1,float16,float16,0,1.2463359832763672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,128,1,float16,fp8,0,1.2034719785054524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,128,1,float16,float16,0,0.656496008237203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,128,1,float16,fp8,0,0.6461600065231323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,128,1,fp8,fp8,0,0.5734613339106241
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,128,1,fp8,fp8,0,1.045898675918579
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,128,1,fp8,fp8,0,0.5237546761830648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,128,1,float16,fp8,0,0.6107946634292603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,128,1,float16,fp8,0,0.6161119937896729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,128,1,float16,float16,0,0.6295679807662964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,128,1,fp8,fp8,0,0.5295519828796387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,128,1,float16,float16,0,0.3356800079345703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,128,1,float16,float16,0,0.6354346672693888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,128,1,float16,fp8,0,0.6246879895528158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,128,1,fp8,fp8,0,0.5396426518758138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,128,1,float16,float16,0,0.351306676864624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,128,1,fp8,fp8,0,0.3020373384157817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,128,1,float16,fp8,0,0.32977600892384845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,128,1,fp8,fp8,0,0.2701386610666911
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,128,1,float16,float16,0,0.33744533856709796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,128,1,float16,fp8,0,0.33195199569066364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,128,1,float16,fp8,0,0.3447200059890747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,128,1,fp8,fp8,0,0.2733280062675476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,128,1,float16,float16,0,0.3417653242746989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,128,1,float16,fp8,0,0.33446399370829266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,128,1,float16,fp8,0,0.16987733046213785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,128,1,float16,float16,0,0.1721280018488566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,128,1,fp8,fp8,0,0.2794826626777649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,128,1,float16,float16,0,0.18476800123850504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,128,1,float16,fp8,0,0.18348799149195352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,128,1,fp8,fp8,0,0.16525333126386008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,128,1,fp8,fp8,0,0.150325338045756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,128,1,float16,float16,0,0.1738133430480957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,128,1,float16,fp8,0,0.17145599921544394
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,128,1,float16,float16,0,0.1771786610285441
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,128,1,float16,fp8,0,0.17309866348902384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,128,1,float16,float16,0,0.1053706705570221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,128,1,fp8,fp8,0,0.1548373301823934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,128,1,fp8,fp8,0,0.15134933590888977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,128,1,float16,fp8,0,0.10446400443712871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,128,1,fp8,fp8,0,0.09718933701515198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,128,1,float16,float16,0,0.09929066896438599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,128,1,float16,fp8,0,0.09773866335550944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,128,1,fp8,fp8,0,0.08468799789746602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,128,1,float16,float16,0,0.09947199622790019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,128,1,float16,fp8,0,0.09829333424568176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,128,1,float16,float16,0,0.10037866234779358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,128,1,float16,fp8,0,0.0990773340066274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,128,1,fp8,fp8,0,0.0874079962571462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,128,1,fp8,fp8,0,0.08888000249862671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,128,1,float16,float16,0,0.06589866677920024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,128,1,float16,float16,0,0.06377066671848297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,128,1,float16,fp8,0,0.06497600177923839
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,128,1,fp8,fp8,0,0.059706668059031166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,128,1,float16,fp8,0,0.06354133288065593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,128,1,fp8,fp8,0,0.05604266623655955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,128,1,float16,fp8,0,0.06369066735108693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,128,1,float16,float16,0,0.06397866706053416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,128,1,fp8,fp8,0,0.056458666920661926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,128,1,float16,float16,0,0.06471466521422069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,128,1,float16,fp8,0,0.0634933312733968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,128,1,fp8,fp8,0,0.05709333221117655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,128,1,float16,float16,0,1.5620053609212239
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,128,1,float16,fp8,0,1.5071199735005696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,128,1,fp8,fp8,0,1.3345813751220703
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,128,1,float16,float16,0,1.564458688100179
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,128,1,fp8,fp8,0,1.3613653182983398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,128,1,float16,fp8,0,1.5191787083943684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,128,1,float16,fp8,0,1.5571413040161133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,128,1,float16,float16,0,1.5898186365763347
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,128,1,float16,float16,0,0.79258131980896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,128,1,float16,fp8,0,0.7724853356679281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,128,1,fp8,fp8,0,0.6856799920399984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,128,1,float16,float16,0,0.8389973640441895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,128,1,float16,float16,0,0.7970826625823975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,128,1,float16,fp8,0,0.8284320036570231
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,128,1,fp8,fp8,0,1.391808032989502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,128,1,fp8,fp8,0,0.7586773236592611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,128,1,fp8,fp8,0,0.696773370107015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,128,1,float16,fp8,0,0.7769066492716471
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,128,1,float16,float16,0,0.8122239907582601
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,128,1,float16,float16,0,0.41521600882212323
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,128,1,float16,fp8,0,0.4043733278910319
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,128,1,float16,fp8,0,0.7923786640167236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,128,1,fp8,fp8,0,0.7090826829274496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,128,1,float16,float16,0,0.43907201290130615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,128,1,float16,fp8,0,0.43297600746154785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,128,1,fp8,fp8,0,0.39629332224527997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,128,1,fp8,fp8,0,0.36002135276794434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,128,1,float16,float16,0,0.41859734058380127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,128,1,float16,fp8,0,0.406549334526062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,128,1,fp8,fp8,0,0.3632853428522746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,128,1,float16,float16,0,0.4218133290608724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,128,1,float16,fp8,0,0.4156586726506551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,128,1,fp8,fp8,0,0.3712853193283081
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,128,1,float16,float16,0,0.22502932945887247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,128,1,float16,fp8,0,0.22005865971247354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,128,1,float16,fp8,0,0.23628799120585123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,128,1,fp8,fp8,0,0.187882661819458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,128,1,float16,float16,0,0.23689599831899008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,128,1,float16,float16,0,0.2273226579030355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,128,1,fp8,fp8,0,0.21109867095947266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,128,1,float16,fp8,0,0.2226453423500061
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,128,1,fp8,fp8,0,0.1913706660270691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,128,1,float16,float16,0,0.22961066166559854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,128,1,float16,float16,0,0.11798399686813354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,128,1,float16,fp8,0,0.12818132837613425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,128,1,float16,fp8,0,0.22719999154408774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,128,1,fp8,fp8,0,0.194106658299764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,128,1,fp8,fp8,0,0.11981866757074992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,128,1,float16,float16,0,0.12813867131868997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,128,1,float16,fp8,0,0.11683199803034465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,128,1,fp8,fp8,0,0.10714667042096455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,128,1,float16,float16,0,0.11947199702262878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,128,1,float16,fp8,0,0.11849066615104675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,128,1,float16,float16,0,0.12159466743469238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,128,1,fp8,fp8,0,0.10930132865905762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,128,1,float16,fp8,0,0.12060266733169556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,128,1,fp8,fp8,0,0.11126933495203654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,128,1,float16,float16,0,0.07663999994595845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,128,1,float16,float16,0,0.0717493345340093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,128,1,float16,fp8,0,0.07558933397134145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,128,1,float16,fp8,0,0.07028799752394359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,128,1,fp8,fp8,0,0.06367999811967213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,128,1,fp8,fp8,0,0.07270933190981548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,128,1,float16,float16,0,0.07188799977302551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,128,1,fp8,fp8,0,0.06378133098284404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,128,1,float16,float16,0,0.07173333565394084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,128,1,float16,fp8,0,0.07115733126799266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,128,1,float16,fp8,0,0.07072000205516815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,128,1,float16,float16,0,0.05329599976539612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,128,1,fp8,fp8,0,0.06503466765085857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,128,1,fp8,fp8,0,0.04823466638724009
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,128,1,float16,fp8,0,0.05261866748332977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,128,1,float16,fp8,0,0.05146666864554087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,128,1,fp8,fp8,0,0.04634666442871094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,128,1,float16,float16,0,0.052005335688591
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,128,1,float16,fp8,0,0.05184000233809153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,128,1,fp8,fp8,0,0.04655999938646952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,128,1,float16,float16,0,0.052426666021347046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,128,1,float16,fp8,0,0.0517546683549881
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,128,1,float16,float16,0,0.052042668064435325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,128,1,fp8,fp8,0,0.046800002455711365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,128,1,float16,float16,0,1.4898239771525066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,128,1,fp8,fp8,0,1.312666654586792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,128,1,float16,fp8,0,1.4758346875508626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,128,1,float16,float16,0,1.4925012588500977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,128,1,float16,fp8,0,1.491194725036621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,128,1,fp8,fp8,0,1.3949119249979656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,128,1,float16,fp8,0,1.6054612795511882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,128,1,float16,float16,0,1.5782666206359863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,128,1,float16,float16,0,0.7549013296763102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,128,1,float16,fp8,0,0.7559413115183512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,128,1,fp8,fp8,0,0.6645013491312662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,128,1,float16,float16,0,0.7630346616109213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,128,1,float16,fp8,0,0.8530240058898926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,128,1,float16,float16,0,0.8741333484649658
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,128,1,fp8,fp8,0,1.5020480155944824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,128,1,fp8,fp8,0,0.7741386890411377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,128,1,float16,fp8,0,0.7616533438364664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,128,1,fp8,fp8,0,0.7060533364613851
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,128,1,float16,float16,0,0.3922826846440633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,128,1,float16,fp8,0,0.3930986722310384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,128,1,float16,float16,0,0.7994666894276937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,128,1,float16,float16,0,0.44495999813079834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,128,1,float16,fp8,0,0.7986346880594889
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,128,1,fp8,fp8,0,0.7627519766489664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,128,1,float16,fp8,0,0.4362026850382487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,128,1,fp8,fp8,0,0.4017759958902995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,128,1,fp8,fp8,0,0.34510934352874756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,128,1,float16,float16,0,0.39935465653737384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,128,1,float16,fp8,0,0.3959680000940959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,128,1,fp8,fp8,0,0.3647093375523885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,128,1,float16,fp8,0,0.4078933397928874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,128,1,float16,float16,0,0.4098079999287923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,128,1,fp8,fp8,0,0.39560532569885254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,128,1,float16,float16,0,0.23732799291610718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,128,1,float16,float16,0,0.21125332514444986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,128,1,float16,fp8,0,0.23323200146357217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,128,1,fp8,fp8,0,0.20801599820454916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,128,1,float16,fp8,0,0.2121493419011434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,128,1,fp8,fp8,0,0.17926400899887085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,128,1,float16,fp8,0,0.21326400836308798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,128,1,fp8,fp8,0,0.18339733282725015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,128,1,float16,float16,0,0.22109333674112955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,128,1,float16,float16,0,0.2136426568031311
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,128,1,float16,fp8,0,0.2195146679878235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,128,1,float16,float16,0,0.13079466422398886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,128,1,float16,float16,0,0.11231999595959981
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,128,1,float16,fp8,0,0.12779733538627625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,128,1,fp8,fp8,0,0.2018186648686727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,128,1,float16,fp8,0,0.11196800072987874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,128,1,fp8,fp8,0,0.11173866192499797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,128,1,float16,float16,0,0.11382933457692464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,128,1,fp8,fp8,0,0.09813867012659709
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,128,1,fp8,fp8,0,0.10126399993896484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,128,1,float16,fp8,0,0.1132533351580302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,128,1,float16,float16,0,0.11724799871444702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,128,1,float16,fp8,0,0.11703466375668843
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,128,1,fp8,fp8,0,0.10771200060844421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,128,1,float16,float16,0,0.06990399956703186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,128,1,float16,fp8,0,0.06959466636180878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,128,1,float16,float16,0,0.06195199986298879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,128,1,float16,fp8,0,0.06246933341026306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,128,1,fp8,fp8,0,0.057029331723848976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,128,1,fp8,fp8,0,0.06566399832566579
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,128,1,float16,float16,0,0.06294933458169301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,128,1,float16,fp8,0,0.06297066807746887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,128,1,fp8,fp8,0,0.0590826670328776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,128,1,float16,float16,0,0.06483733157316844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,128,1,float16,fp8,0,0.06493866443634033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,128,1,fp8,fp8,0,0.06367466847101848
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,128,1,float16,float16,0,0.04093866546948751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,128,1,float16,fp8,0,0.03823466598987579
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,128,1,float16,float16,0,0.03844800094763438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,128,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,128,1,float16,fp8,0,0.04091199984153112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,128,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,128,1,fp8,fp8,0,0.03739733248949051
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,128,1,float16,fp8,0,0.03938133269548416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,128,1,float16,float16,0,0.03920533259709676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,128,1,float16,fp8,0,0.03852266569932302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,128,1,float16,float16,0,0.03857066730658213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,128,1,float16,fp8,0,0.03629333277543386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,128,1,fp8,fp8,0,0.03948266555865606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,128,1,float16,float16,0,0.03589333345492681
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,128,1,float16,float16,0,0.03469866762558619
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,128,1,float16,fp8,0,0.03442133218050003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,128,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,128,1,fp8,fp8,0,0.030591999491055805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,128,1,float16,float16,0,0.03490666548411051
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,128,1,float16,fp8,0,0.03505066782236099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,128,1,fp8,fp8,0,0.031194667021433514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,128,1,float16,fp8,0,0.03568533311287562
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,128,1,fp8,fp8,0,0.031658666829268135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,128,1,float16,float16,0,0.03521066655715307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,1,128,1,float16,float16,0,1.1714346408843994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,1,128,1,float16,fp8,0,1.1445226669311523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,1,128,1,fp8,fp8,0,1.0890133380889893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,2,128,1,float16,float16,0,1.1964266300201416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,2,128,1,fp8,fp8,0,1.1549066702524822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,2,128,1,float16,fp8,0,1.1972586313883464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,4,128,1,float16,float16,0,1.2712106704711914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,4,128,1,float16,fp8,0,1.2586133480072021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,1,128,1,float16,float16,0,0.586736003557841
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,1,128,1,float16,fp8,0,0.5898933410644531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,1,128,1,fp8,fp8,0,0.5491520166397095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,12,128,1,float16,float16,0,0.7038559913635254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,2,128,1,float16,float16,0,0.5994240045547485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,12,4,128,1,fp8,fp8,0,1.2684373060862224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,12,128,1,float16,fp8,0,0.6858133474985758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,12,128,1,fp8,fp8,0,0.6585973501205444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,2,128,1,float16,fp8,0,0.5945440133412679
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,2,128,1,fp8,fp8,0,0.5922346512476603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,4,128,1,float16,float16,0,0.63919464747111
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,1,128,1,float16,float16,0,0.30617600679397583
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,1,128,1,float16,fp8,0,0.30722665786743164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,4,128,1,float16,fp8,0,0.6335786581039429
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,12,128,1,float16,float16,0,0.36100268363952637
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,12,128,1,fp8,fp8,0,0.33883734544118244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,12,4,128,1,fp8,fp8,0,0.6433279911677042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,12,128,1,float16,fp8,0,0.35096534093221027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,1,128,1,fp8,fp8,0,0.28592000404993695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,2,128,1,float16,float16,0,0.3126399914423625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,2,128,1,float16,fp8,0,0.31175466378529865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,2,128,1,fp8,fp8,0,0.3027413288752238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,4,128,1,float16,float16,0,0.32788799206415814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,4,128,1,float16,fp8,0,0.32207467158635456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,12,128,1,float16,float16,0,0.19292267163594565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,12,128,1,float16,fp8,0,0.18874667088190714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,1,128,1,float16,float16,0,0.16781866550445557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,12,4,128,1,fp8,fp8,0,0.3316906690597534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,12,128,1,fp8,fp8,0,0.17708265781402588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,1,128,1,float16,fp8,0,0.1660426656405131
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,1,128,1,fp8,fp8,0,0.15290666619936624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,2,128,1,float16,float16,0,0.16845866044362387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,2,128,1,fp8,fp8,0,0.15828266739845276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,2,128,1,float16,fp8,0,0.1681600014368693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,4,128,1,float16,float16,0,0.17755200465520224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,4,128,1,fp8,fp8,0,0.17417067289352417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,12,4,128,1,float16,fp8,0,0.1739520033200582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,12,128,1,float16,float16,0,0.11242666840553284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,12,128,1,float16,fp8,0,0.10890133182207744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,1,128,1,float16,float16,0,0.09130666653315227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,1,128,1,float16,fp8,0,0.09004799524943034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,12,128,1,fp8,fp8,0,0.09697066744168599
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,1,128,1,fp8,fp8,0,0.08546132842699687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,2,128,1,float16,float16,0,0.09195199608802795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,2,128,1,fp8,fp8,0,0.08819199601809184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,2,128,1,float16,fp8,0,0.09150933225949605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,4,128,1,float16,float16,0,0.09710933764775594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,4,128,1,float16,fp8,0,0.09564266602198283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,12,4,128,1,fp8,fp8,0,0.09430932998657227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,12,128,1,fp8,fp8,0,0.05778666834036509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,12,128,1,float16,float16,0,0.06063466767470042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,12,128,1,float16,fp8,0,0.05985599756240845
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,1,128,1,float16,float16,0,0.052239999175071716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,1,128,1,fp8,fp8,0,0.04906666775544485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,2,128,1,float16,float16,0,0.05219733218352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,2,128,1,float16,fp8,0,0.0529013325770696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,1,128,1,float16,fp8,0,0.05179733534653982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,2,128,1,fp8,fp8,0,0.05004266897837321
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,4,128,1,float16,float16,0,0.054858664671579994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,4,128,1,float16,fp8,0,0.05530133346716563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,12,4,128,1,fp8,fp8,0,0.05533333122730255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,1,128,1,float16,fp8,0,0.033946665624777474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,12,128,1,float16,float16,0,0.036650667587916054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,12,128,1,float16,fp8,0,0.03693866729736328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,12,128,1,fp8,fp8,0,0.03640533238649368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,1,128,1,fp8,fp8,0,0.03379733363787333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,2,128,1,float16,float16,0,0.03469333300987879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,1,128,1,float16,float16,0,0.033600000043710075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,2,128,1,float16,fp8,0,0.03454400102297465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,2,128,1,fp8,fp8,0,0.03403199960788091
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,4,128,1,float16,float16,0,0.03498133271932602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,4,128,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,12,4,128,1,float16,fp8,0,0.034927998979886375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,12,128,1,float16,fp8,0,0.02977066735426585
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,12,128,1,float16,float16,0,0.029370665550231934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,1,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,1,128,1,float16,fp8,0,0.027717334528764088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,12,128,1,fp8,fp8,0,0.02759466568628947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,1,128,1,fp8,fp8,0,0.026000000536441803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,2,128,1,float16,float16,0,0.028202667832374573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,2,128,1,float16,fp8,0,0.02804800122976303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,2,128,1,fp8,fp8,0,0.026906666656335194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,4,128,1,float16,float16,0,0.02847466617822647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,4,128,1,fp8,fp8,0,0.027855999767780304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,12,4,128,1,float16,fp8,0,0.028944000601768494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,12,128,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,1,128,1,float16,float16,0,0.02407466620206833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,12,128,1,fp8,fp8,0,0.023770667612552643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,12,128,1,float16,float16,0,0.02476799984773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,1,128,1,float16,fp8,0,0.024480000138282776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,1,128,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,2,128,1,float16,float16,0,0.025034666061401367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,2,128,1,float16,fp8,0,0.024773334463437397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,2,128,1,fp8,fp8,0,0.022848000129063923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,4,128,1,float16,float16,0,0.024501333634058636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,4,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,12,4,128,1,fp8,fp8,0,0.023584000766277313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,1,128,1,fp8,fp8,0,0.46987199783325195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,1,128,1,float16,float16,0,0.49172266324361164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,2,128,1,float16,float16,0,0.5063253243764242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,1,128,1,float16,fp8,0,0.49084266026814777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,2,128,1,float16,fp8,0,0.5018506844838461
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,2,128,1,fp8,fp8,0,0.514517347017924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,4,128,1,float16,float16,0,0.5486453374226888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,1,128,1,float16,float16,0,0.2569813330968221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,1,128,1,float16,fp8,0,0.2586826682090759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,4,128,1,float16,fp8,0,0.5440106789271036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,1,128,1,fp8,fp8,0,0.24612800280253092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,12,128,1,float16,float16,0,0.31752000252405804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,12,128,1,float16,fp8,0,0.30980799595514935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,12,4,128,1,fp8,fp8,0,0.5724213520685831
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,12,128,1,fp8,fp8,0,0.30348267157872516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,2,128,1,float16,float16,0,0.26429333289464313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,2,128,1,float16,fp8,0,0.26182933648427326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,2,128,1,fp8,fp8,0,0.2645813425381978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,4,128,1,float16,float16,0,0.28151466449101764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,4,128,1,float16,fp8,0,0.2759840091069539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,1,128,1,float16,float16,0,0.13984533150990805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,12,128,1,float16,float16,0,0.17015999555587769
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,12,4,128,1,fp8,fp8,0,0.2971893350283305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,1,128,1,float16,fp8,0,0.139984001715978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,12,128,1,fp8,fp8,0,0.15871999661127725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,12,128,1,float16,fp8,0,0.16595733165740967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,1,128,1,fp8,fp8,0,0.1344586710135142
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,2,128,1,float16,float16,0,0.14326933026313782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,2,128,1,float16,fp8,0,0.14201600352923074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,2,128,1,fp8,fp8,0,0.14114666978518167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,4,128,1,float16,float16,0,0.15125866731007895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,4,128,1,float16,fp8,0,0.14893333117167154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,12,4,128,1,fp8,fp8,0,0.15640532970428467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,12,128,1,float16,fp8,0,0.10008533795674641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,12,128,1,fp8,fp8,0,0.08891733487447102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,1,128,1,float16,float16,0,0.07943999767303467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,1,128,1,float16,fp8,0,0.07996800045172374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,1,128,1,fp8,fp8,0,0.07598933577537537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,12,128,1,float16,float16,0,0.10175466537475586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,2,128,1,float16,float16,0,0.08158933122952779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,2,128,1,float16,fp8,0,0.08109333117802937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,2,128,1,fp8,fp8,0,0.08011200030644734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,4,128,1,float16,float16,0,0.08643200000127156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,4,128,1,float16,fp8,0,0.0855519970258077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,12,4,128,1,fp8,fp8,0,0.08570133646329244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,12,128,1,float16,float16,0,0.05532266696294149
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,1,128,1,float16,float16,0,0.04644800225893656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,1,128,1,float16,fp8,0,0.04666133224964142
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,1,128,1,fp8,fp8,0,0.043375998735427856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,12,128,1,float16,fp8,0,0.05500266452630361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,12,128,1,fp8,fp8,0,0.05332799752553304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,2,128,1,float16,float16,0,0.047450666626294456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,2,128,1,fp8,fp8,0,0.04525866607824961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,4,128,1,float16,float16,0,0.04980800052483877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,4,128,1,float16,fp8,0,0.04958933095137278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,12,4,128,1,fp8,fp8,0,0.04966933528582255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,1,128,1,float16,fp8,0,0.0330826664964358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,1,128,1,float16,float16,0,0.03268799930810928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,12,128,1,float16,float16,0,0.036117332677046456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,12,128,1,float16,fp8,0,0.03481066723664602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,12,128,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,1,128,1,fp8,fp8,0,0.030320001145203907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,2,128,1,float16,float16,0,0.03299200038115183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,2,128,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,2,128,1,float16,fp8,0,0.033013333876927696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,4,128,1,float16,float16,0,0.03379199902216593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,1,128,1,float16,float16,0,0.024618667860825855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,12,128,1,float16,float16,0,0.02622933437426885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,12,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,12,128,1,float16,fp8,0,0.026501332720120747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,4,128,1,float16,fp8,0,0.033930666744709015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,1,128,1,float16,fp8,0,0.024666666984558105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,12,4,128,1,fp8,fp8,0,0.03253333270549774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,1,128,1,fp8,fp8,0,0.02363733450571696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,2,128,1,float16,float16,0,0.024698667228221893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,2,128,1,float16,fp8,0,0.02465066562096278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,2,128,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,4,128,1,fp8,fp8,0,0.024842667082945507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,4,128,1,float16,float16,0,0.025360000630219776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,12,128,1,float16,fp8,0,0.02202133337656657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,12,128,1,float16,float16,0,0.021738665799299877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,12,4,128,1,float16,fp8,0,0.02565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,12,128,1,fp8,fp8,0,0.020047999918460846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,1,128,1,float16,float16,0,0.021007999777793884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,1,128,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,1,128,1,fp8,fp8,0,0.01995733380317688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,2,128,1,float16,float16,0,0.02126399924357732
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,2,128,1,float16,fp8,0,0.021583999196688335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,4,128,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,12,128,1,float16,float16,0,0.020714666694402695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,4,128,1,fp8,fp8,0,0.020175999651352566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,12,128,1,float16,fp8,0,0.02027733375628789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,12,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,1,128,1,float16,float16,0,0.020549333343903225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,2,128,1,fp8,fp8,0,0.02025066688656807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,12,4,128,1,float16,float16,0,0.021733333667119343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,1,128,1,float16,fp8,0,0.020703999946514767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,1,128,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,2,128,1,float16,float16,0,0.020288000504175823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,2,128,1,float16,fp8,0,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,2,128,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,4,128,1,float16,float16,0,0.020560000091791153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,4,128,1,float16,fp8,0,0.020874666670958202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,12,4,128,1,fp8,fp8,0,0.01972266659140587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,1,128,1,float16,float16,0,0.23873066902160645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,1,128,1,float16,fp8,0,0.23985066016515097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,1,128,1,fp8,fp8,0,0.2456106742223104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,2,128,1,float16,float16,0,0.2450453241666158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,2,128,1,float16,fp8,0,0.2427519957224528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,2,128,1,fp8,fp8,0,0.2656586567560832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,4,128,1,float16,fp8,0,0.25780800978342694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,4,128,1,float16,float16,0,0.2609600027402242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,1,128,1,float16,float16,0,0.13176533579826355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,1,128,1,float16,fp8,0,0.1338933308919271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,12,128,1,float16,fp8,0,0.16461867094039917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,12,128,1,float16,float16,0,0.1693920095761617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,12,4,128,1,fp8,fp8,0,0.2993866602579753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,12,128,1,fp8,fp8,0,0.15963733196258545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,2,128,1,float16,fp8,0,0.13450666268666586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,2,128,1,float16,float16,0,0.13422933220863342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,2,128,1,fp8,fp8,0,0.14270933469136557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,1,128,1,fp8,fp8,0,0.1342026690642039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,4,128,1,float16,float16,0,0.14382933576901755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,4,128,1,float16,fp8,0,0.14110400279362997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,12,128,1,float16,float16,0,0.09673600395520528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,12,4,128,1,fp8,fp8,0,0.1555466651916504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,1,128,1,float16,float16,0,0.07553066809972127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,1,128,1,float16,fp8,0,0.075914666056633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,12,128,1,fp8,fp8,0,0.0902346670627594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,12,128,1,float16,fp8,0,0.09150933225949605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,1,128,1,fp8,fp8,0,0.07551999886830647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,2,128,1,float16,fp8,0,0.07737599809964497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,2,128,1,float16,float16,0,0.07689066727956136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,2,128,1,fp8,fp8,0,0.07977599898974101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,4,128,1,float16,float16,0,0.08115733166535695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,12,128,1,float16,float16,0,0.053472002347310386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,1,128,1,float16,float16,0,0.04451199869314829
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,1,128,1,float16,fp8,0,0.04458666841189066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,12,128,1,fp8,fp8,0,0.05172266562779745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,4,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,12,4,128,1,fp8,fp8,0,0.08534399668375652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,1,128,1,fp8,fp8,0,0.043824002146720886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,12,128,1,float16,fp8,0,0.05216533442338308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,2,128,1,float16,float16,0,0.04514666895071665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,2,128,1,float16,fp8,0,0.04524266719818115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,2,128,1,fp8,fp8,0,0.04479999840259552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,4,128,1,float16,float16,0,0.04692799846331278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,12,128,1,float16,float16,0,0.03433600068092346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,4,128,1,float16,fp8,0,0.04762133459250132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,12,4,128,1,fp8,fp8,0,0.04927466809749603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,1,128,1,float16,float16,0,0.03173866619666418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,1,128,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,12,128,1,fp8,fp8,0,0.033013333876927696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,12,128,1,float16,fp8,0,0.034272000193595886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,1,128,1,fp8,fp8,0,0.03086400032043457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,2,128,1,float16,float16,0,0.03209600100914637
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,4,128,1,float16,fp8,0,0.032831999162832894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,4,128,1,float16,float16,0,0.033146666983763375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,4,128,1,fp8,fp8,0,0.03251733382542928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,2,128,1,fp8,fp8,0,0.030906667311986286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,12,2,128,1,float16,fp8,0,0.032325332363446556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,12,128,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,12,128,1,fp8,fp8,0,0.024357333779335022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,1,128,1,float16,float16,0,0.0235359991590182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,12,128,1,float16,float16,0,0.024847999215126038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,1,128,1,float16,fp8,0,0.0242399995525678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,1,128,1,fp8,fp8,0,0.022821334501107533
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,2,128,1,fp8,fp8,0,0.02350933353106181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,2,128,1,float16,float16,0,0.023887999355793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,4,128,1,float16,fp8,0,0.02474133421977361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,4,128,1,float16,float16,0,0.02426133304834366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,2,128,1,float16,fp8,0,0.023936000963052113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,12,128,1,float16,float16,0,0.020319999506076176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,12,128,1,float16,fp8,0,0.020143999407688778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,12,4,128,1,fp8,fp8,0,0.024288001159826916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,1,128,1,float16,fp8,0,0.019738666713237762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,1,128,1,fp8,fp8,0,0.019962667177120846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,12,128,1,fp8,fp8,0,0.01995733380317688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,1,128,1,float16,float16,0,0.020037333170572918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,2,128,1,float16,fp8,0,0.020202666521072388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,2,128,1,fp8,fp8,0,0.019744000087181728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,2,128,1,float16,float16,0,0.019973333925008774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,4,128,1,float16,fp8,0,0.020362666497627895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,12,128,1,float16,float16,0,0.018885333091020584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,4,128,1,fp8,fp8,0,0.020202666521072388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,12,4,128,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,12,128,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,1,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,12,128,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,1,128,1,float16,fp8,0,0.01970133309563001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,1,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,2,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,2,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,4,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,4,128,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,4,128,1,fp8,fp8,0,0.01953599974513054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,12,2,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,12,128,1,float16,fp8,0,0.01874133323629697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,12,128,1,fp8,fp8,0,0.01830400029818217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,1,128,1,float16,float16,0,0.018751999984184902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,1,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,1,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,12,128,1,float16,float16,0,0.01828266680240631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,2,128,1,float16,float16,0,0.018837332725524902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,2,128,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,2,128,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,4,128,1,float16,float16,0,0.018522666146357853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,4,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,12,4,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,1,128,1,float16,float16,0,0.16124266386032104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,1,128,1,float16,fp8,0,0.1618880033493042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,2,128,1,float16,fp8,0,0.16154666741689047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,2,128,1,fp8,fp8,0,0.18278400103251138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,4,128,1,float16,fp8,0,0.16804800430933634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,4,128,1,float16,float16,0,0.16848532358805338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,1,128,1,fp8,fp8,0,0.17621866861979166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,2,128,1,float16,float16,0,0.16130666931470236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,12,4,128,1,fp8,fp8,0,0.19898132483164468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,1,128,1,float16,float16,0,0.08841066559155782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,1,128,1,float16,fp8,0,0.08865066369374593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,12,128,1,float16,fp8,0,0.09450667103131612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,12,128,1,fp8,fp8,0,0.11014933387438457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,12,128,1,float16,float16,0,0.09950400392214458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,2,128,1,float16,float16,0,0.09008000294367473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,1,128,1,fp8,fp8,0,0.09631466865539551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,2,128,1,float16,fp8,0,0.08964799841245015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,2,128,1,fp8,fp8,0,0.10116799672444661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,4,128,1,float16,float16,0,0.09319466352462769
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,4,128,1,float16,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,12,4,128,1,fp8,fp8,0,0.10631466905275981
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,1,128,1,float16,float16,0,0.0503359983364741
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,12,128,1,float16,float16,0,0.056661332647005715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,1,128,1,float16,fp8,0,0.050757333636283875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,12,128,1,float16,fp8,0,0.055173332492510475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,2,128,1,float16,float16,0,0.05115733544031779
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,12,128,1,fp8,fp8,0,0.0629013329744339
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,1,128,1,fp8,fp8,0,0.05482666691144308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,2,128,1,float16,fp8,0,0.051402668158213295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,2,128,1,fp8,fp8,0,0.05550399919350942
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,4,128,1,float16,fp8,0,0.05366399884223938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,12,128,1,float16,float16,0,0.033071999748547874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,12,128,1,fp8,fp8,0,0.03682666768630346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,4,128,1,float16,float16,0,0.05383466680844625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,1,128,1,float16,float16,0,0.03233599911133448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,12,4,128,1,fp8,fp8,0,0.059434667229652405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,1,128,1,float16,fp8,0,0.03233066697915395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,12,128,1,float16,fp8,0,0.03286933402220408
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,1,128,1,fp8,fp8,0,0.03425066669782003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,2,128,1,float16,float16,0,0.03291733314593633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,2,128,1,float16,fp8,0,0.03303466737270355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,12,128,1,fp8,fp8,0,0.028416000306606293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,4,128,1,float16,float16,0,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,2,128,1,fp8,fp8,0,0.03483733286460241
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,4,128,1,fp8,fp8,0,0.036320000886917114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,1,128,1,float16,fp8,0,0.025973332424958546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,12,4,128,1,float16,fp8,0,0.033674667278925575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,1,128,1,float16,float16,0,0.025813333690166473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,12,128,1,float16,float16,0,0.026704000929991405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,12,128,1,float16,fp8,0,0.027002667387326557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,1,128,1,fp8,fp8,0,0.027210667729377747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,2,128,1,float16,fp8,0,0.02588266630967458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,2,128,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,2,128,1,float16,float16,0,0.026021334032217663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,4,128,1,float16,fp8,0,0.027114666998386383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,1,128,1,float16,float16,0,0.018687999496857326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,12,128,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,4,128,1,float16,float16,0,0.02644266684850057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,12,128,1,fp8,fp8,0,0.020442667106787365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,12,4,128,1,fp8,fp8,0,0.02863999952872594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,12,128,1,float16,float16,0,0.019871999820073444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,2,128,1,float16,float16,0,0.018885333091020584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,1,128,1,fp8,fp8,0,0.020293333878119785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,1,128,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,2,128,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,12,128,1,float16,float16,0,0.01773333301146825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,4,128,1,float16,float16,0,0.01934933289885521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,2,128,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,12,128,1,float16,fp8,0,0.018426666657129925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,1,128,1,float16,fp8,0,0.01820266619324684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,4,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,12,4,128,1,fp8,fp8,0,0.020319999506076176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,1,128,1,float16,float16,0,0.01823466643691063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,1,128,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,12,128,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,2,128,1,fp8,fp8,0,0.018640000373125076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,2,128,1,float16,fp8,0,0.018485333770513535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,2,128,1,float16,float16,0,0.017738666385412216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,4,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,12,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,12,128,1,fp8,fp8,0,0.01836799954374631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,4,128,1,float16,float16,0,0.01821333294113477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,12,4,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,1,128,1,float16,float16,0,0.017312000195185345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,12,128,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,2,128,1,float16,fp8,0,0.01806933308641116
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,1,128,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,2,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,1,128,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,4,128,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,12,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,2,128,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,4,128,1,float16,float16,0,0.01773333301146825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,12,4,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,12,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,12,128,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,1,128,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,1,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,1,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,4,128,1,fp8,fp8,0,0.01814933369557063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,4,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,4,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,2,128,1,fp8,fp8,0,0.01855466639002164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,2,128,1,float16,fp8,0,0.017781333376963932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,12,2,128,1,float16,float16,0,0.017423999806245167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,1,128,1,float16,float16,0,0.11035733421643575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,1,128,1,fp8,fp8,0,0.1437493364016215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,1,128,1,float16,fp8,0,0.11149332920710246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,2,128,1,float16,float16,0,0.11351466178894043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,2,128,1,float16,fp8,0,0.11386133233706157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,2,128,1,fp8,fp8,0,0.14789866407712302
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,4,128,1,fp8,fp8,0,0.15294399857521057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,12,128,1,float16,float16,0,0.06816000243028005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,4,128,1,float16,float16,0,0.11884799599647522
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,1,128,1,float16,float16,0,0.060602664947509766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,12,4,128,1,float16,fp8,0,0.11732266346613567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,12,128,1,fp8,fp8,0,0.0869653324286143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,12,128,1,float16,fp8,0,0.0663679987192154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,2,128,1,float16,float16,0,0.06182399888833364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,1,128,1,fp8,fp8,0,0.07852266728878021
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,1,128,1,float16,fp8,0,0.06151466568311056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,2,128,1,fp8,fp8,0,0.08050666749477386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,4,128,1,float16,fp8,0,0.06417066852251689
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,4,128,1,float16,float16,0,0.06386133531729381
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,12,128,1,float16,float16,0,0.03862933317820231
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,2,128,1,float16,fp8,0,0.0627040018637975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,12,4,128,1,fp8,fp8,0,0.08346666892369588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,1,128,1,float16,float16,0,0.03773866593837738
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,12,128,1,float16,fp8,0,0.03865066667397817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,1,128,1,float16,fp8,0,0.03788266579310099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,2,128,1,float16,float16,0,0.03805333375930786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,1,128,1,fp8,fp8,0,0.045925334095954895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,12,128,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,12,128,1,float16,float16,0,0.02788266787926356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,4,128,1,float16,float16,0,0.03897066662708918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,2,128,1,float16,fp8,0,0.03822399924198786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,4,128,1,fp8,fp8,0,0.0487360010544459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,4,128,1,float16,fp8,0,0.03916800022125244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,12,2,128,1,fp8,fp8,0,0.046767999728520714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,1,128,1,float16,float16,0,0.027301333844661713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,12,128,1,float16,fp8,0,0.027866666515668232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,12,128,1,fp8,fp8,0,0.03268799930810928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,1,128,1,float16,fp8,0,0.027029333015282948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,2,128,1,float16,float16,0,0.02752533306678136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,2,128,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,4,128,1,float16,float16,0,0.02804800122976303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,2,128,1,fp8,fp8,0,0.031370667119820915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,1,128,1,fp8,fp8,0,0.03158933420976003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,4,128,1,float16,fp8,0,0.028058665494124096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,12,128,1,float16,float16,0,0.02107733239730199
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,1,128,1,float16,float16,0,0.020938667158285778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,12,128,1,float16,fp8,0,0.02146133283774058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,12,4,128,1,fp8,fp8,0,0.03331733246644338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,12,128,1,fp8,fp8,0,0.024277334411938984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,1,128,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,2,128,1,fp8,fp8,0,0.023669332265853882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,2,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,4,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,1,128,1,fp8,fp8,0,0.024138666689395905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,2,128,1,float16,float16,0,0.021205333371957142
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,4,128,1,fp8,fp8,0,0.02420266717672348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,12,128,1,float16,fp8,0,0.017909333109855652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,12,4,128,1,float16,fp8,0,0.0220320001244545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,12,128,1,float16,float16,0,0.017525333911180496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,12,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,1,128,1,float16,float16,0,0.01743999992807706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,1,128,1,float16,fp8,0,0.017802666872739792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,2,128,1,float16,fp8,0,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,2,128,1,float16,float16,0,0.017498667041460674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,1,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,4,128,1,float16,fp8,0,0.018085333208243053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,4,128,1,float16,float16,0,0.017509333789348602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,4,128,1,fp8,fp8,0,0.01953599974513054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,12,2,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,12,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,12,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,12,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,1,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,1,128,1,float16,float16,0,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,2,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,2,128,1,fp8,fp8,0,0.01859733338157336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,1,128,1,fp8,fp8,0,0.018122666825850803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,4,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,4,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,2,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,12,128,1,fp8,fp8,0,0.017877332866191864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,12,4,128,1,fp8,fp8,0,0.01855466639002164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,12,128,1,float16,fp8,0,0.01647466669480006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,1,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,1,128,1,float16,float16,0,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,12,128,1,float16,float16,0,0.016197333733240765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,1,128,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,2,128,1,fp8,fp8,0,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,4,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,2,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,4,128,1,float16,float16,0,0.01637866720557213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,12,4,128,1,fp8,fp8,0,0.018458666900793713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,12,128,1,float16,fp8,0,0.016496000190575916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,12,128,1,float16,float16,0,0.01600533351302147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,12,128,1,fp8,fp8,0,0.017759999881188076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,1,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,1,128,1,float16,float16,0,0.01628799984852473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,2,128,1,float16,float16,0,0.01653333380818367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,1,128,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,2,128,1,float16,fp8,0,0.016442666451136272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,4,128,1,float16,fp8,0,0.01658133293191592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,2,128,1,fp8,fp8,0,0.018085333208243053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,4,128,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,12,4,128,1,float16,float16,0,0.01632000009218852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,1,128,1,float16,float16,0,0.09392533699671428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,1,128,1,float16,fp8,0,0.09468799829483032
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,2,128,1,float16,fp8,0,0.09499200185139973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,2,128,1,fp8,fp8,0,0.12785599629084268
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,1,128,1,fp8,fp8,0,0.12593066692352295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,2,128,1,float16,float16,0,0.09506133198738098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,4,128,1,float16,float16,0,0.09844799836476643
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,4,128,1,fp8,fp8,0,0.1317813297112783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,12,4,128,1,float16,fp8,0,0.09750399986902873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,12,128,1,float16,float16,0,0.05273066461086273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,12,128,1,float16,fp8,0,0.0521066685517629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,12,128,1,fp8,fp8,0,0.07267199953397115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,1,128,1,float16,fp8,0,0.05426133175690969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,1,128,1,fp8,fp8,0,0.07030933101971944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,1,128,1,float16,float16,0,0.05417066812515259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,2,128,1,float16,float16,0,0.05434666574001312
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,2,128,1,float16,fp8,0,0.05444799860318502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,2,128,1,fp8,fp8,0,0.07134933272997539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,4,128,1,float16,float16,0,0.05499733487764994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,4,128,1,float16,fp8,0,0.05558399856090546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,12,128,1,float16,float16,0,0.03449599941571554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,12,128,1,fp8,fp8,0,0.04437333345413208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,1,128,1,fp8,fp8,0,0.043280000487963356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,1,128,1,float16,fp8,0,0.035504000882307686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,12,128,1,float16,fp8,0,0.034927998979886375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,12,4,128,1,fp8,fp8,0,0.07205866773923238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,2,128,1,float16,float16,0,0.035301332672437034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,1,128,1,float16,float16,0,0.03487999985615412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,2,128,1,float16,fp8,0,0.036661334335803986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,4,128,1,float16,float16,0,0.03605866680542628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,2,128,1,fp8,fp8,0,0.043765331308046974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,4,128,1,float16,fp8,0,0.03633599976698557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,12,128,1,float16,fp8,0,0.02403733382622401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,12,4,128,1,fp8,fp8,0,0.045237332582473755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,12,128,1,fp8,fp8,0,0.028223998844623566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,12,128,1,float16,float16,0,0.02351466566324234
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,1,128,1,float16,fp8,0,0.02437866727511088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,1,128,1,float16,float16,0,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,1,128,1,fp8,fp8,0,0.02792533238728841
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,2,128,1,float16,float16,0,0.024266667664051056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,2,128,1,float16,fp8,0,0.02420799930890401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,4,128,1,fp8,fp8,0,0.028714666763941448
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,2,128,1,fp8,fp8,0,0.028543998797734577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,1,128,1,float16,float16,0,0.020506666352351505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,12,128,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,12,128,1,float16,fp8,0,0.02054399996995926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,4,128,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,12,4,128,1,float16,fp8,0,0.024618667860825855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,12,128,1,float16,float16,0,0.0201706662774086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,1,128,1,float16,fp8,0,0.02060266708334287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,2,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,2,128,1,float16,fp8,0,0.020608000457286835
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,2,128,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,1,128,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,4,128,1,float16,float16,0,0.020560000091791153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,12,128,1,float16,float16,0,0.01648533344268799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,12,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,4,128,1,float16,fp8,0,0.020442667106787365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,12,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,1,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,2,128,1,float16,float16,0,0.016789333273967106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,12,4,128,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,1,128,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,1,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,2,128,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,4,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,4,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,4,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,12,2,128,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,12,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,12,128,1,float16,fp8,0,0.01643199970324834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,1,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,1,128,1,float16,float16,0,0.016528000434239704
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,1,128,1,fp8,fp8,0,0.018138666947682697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,2,128,1,float16,float16,0,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,12,128,1,fp8,fp8,0,0.018378666291634243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,2,128,1,fp8,fp8,0,0.01825599993268649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,4,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,4,128,1,float16,float16,0,0.01629866659641266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,2,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,12,4,128,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,12,128,1,float16,float16,0,0.01598400001724561
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,12,128,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,1,128,1,float16,float16,0,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,1,128,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,12,128,1,fp8,fp8,0,0.018378666291634243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,1,128,1,fp8,fp8,0,0.018053332964579265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,2,128,1,fp8,fp8,0,0.0179626668492953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,4,128,1,float16,float16,0,0.01600533351302147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,4,128,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,2,128,1,float16,float16,0,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,12,4,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,12,128,1,float16,fp8,0,0.01628799984852473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,12,128,1,fp8,fp8,0,0.01802666609485944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,12,128,1,float16,float16,0,0.01587733378012975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,1,128,1,float16,float16,0,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,1,128,1,float16,fp8,0,0.016544000556071598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,1,128,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,2,128,1,fp8,fp8,0,0.018405333161354065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,2,128,1,float16,fp8,0,0.016442666451136272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,4,128,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,2,128,1,float16,float16,0,0.01597333326935768
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,4,128,1,fp8,fp8,0,0.018181333939234417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,12,4,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,128,1,float16,float16,0,8.34709358215332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,128,1,float16,fp8,0,8.365850448608398
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,128,1,float16,fp8,0,8.457226435343424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,128,1,fp8,fp8,0,5.430986404418945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,128,1,float16,float16,0,8.248762766520182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,128,1,fp8,fp8,0,5.508218765258789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,128,1,float16,float16,0,8.513247807820639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,128,1,float16,fp8,0,8.444437026977539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,128,1,float16,float16,0,4.198896090189616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,128,1,float16,float16,0,3.98413880666097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,128,1,float16,fp8,0,4.176229476928711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,128,1,fp8,fp8,0,2.7283786137898765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,128,1,float16,fp8,0,4.000303904215495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,128,1,fp8,fp8,0,5.430543899536133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,128,1,float16,float16,0,4.008576075236003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,128,1,fp8,fp8,0,2.7356961568196616
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,128,1,float16,fp8,0,3.967583974202474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,128,1,fp8,fp8,0,2.744746526082357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,128,1,float16,float16,0,2.063408056894938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,128,1,float16,float16,0,4.005189259847005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,128,1,float16,fp8,0,4.101792017618815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,128,1,float16,fp8,0,2.0627466837565103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,128,1,fp8,fp8,0,2.7646506627400718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,128,1,float16,float16,0,2.0864906311035156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,128,1,float16,fp8,0,2.087461312611898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,128,1,fp8,fp8,0,1.4406347274780273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,128,1,fp8,fp8,0,1.4376853307088215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,128,1,float16,fp8,0,2.10098663965861
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,128,1,float16,float16,0,2.0906346638997397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,128,1,fp8,fp8,0,1.4435733159383137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,128,1,float16,float16,0,2.1219040552775064
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,128,1,float16,fp8,0,2.0852373441060386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,128,1,fp8,fp8,0,1.4535627365112305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,128,1,float16,float16,0,1.1322933038075764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,128,1,float16,fp8,0,1.1239893436431885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,128,1,fp8,fp8,0,0.7241013050079346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,128,1,float16,float16,0,1.1475253105163574
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,128,1,float16,fp8,0,1.1387893358866374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,128,1,fp8,fp8,0,0.7221759955088297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,128,1,float16,float16,0,1.147605339686076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,128,1,float16,fp8,0,1.145674705505371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,128,1,float16,float16,0,1.1517866452534993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,128,1,fp8,fp8,0,0.72270401318868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,128,1,float16,fp8,0,1.1508159637451172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,128,1,fp8,fp8,0,0.7294987042744955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,128,1,float16,fp8,0,4.68777592976888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,128,1,fp8,fp8,0,3.1494614283243814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,128,1,float16,float16,0,4.763375918070476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,128,1,float16,float16,0,4.772645314534505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,128,1,fp8,fp8,0,3.1610132853190103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,128,1,float16,fp8,0,4.5857547124226885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,128,1,float16,float16,0,4.891482671101888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,128,1,float16,fp8,0,4.624784151713054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,128,1,float16,fp8,0,2.3335893948872886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,128,1,float16,float16,0,2.3533120155334473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,128,1,float16,float16,0,2.331226666768392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,128,1,fp8,fp8,0,1.6631147066752117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,128,1,float16,fp8,0,2.3433547019958496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,128,1,fp8,fp8,0,1.6488906542460124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,128,1,fp8,fp8,0,3.2010987599690757
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,128,1,float16,fp8,0,2.338805357615153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,128,1,float16,float16,0,2.361578623453776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,128,1,fp8,fp8,0,1.6434507369995117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,128,1,float16,float16,0,1.2549653053283691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,128,1,float16,float16,0,2.3636693954467773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,128,1,float16,fp8,0,2.3677013715108237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,128,1,float16,fp8,0,1.2521759668986003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,128,1,fp8,fp8,0,1.6601173082987468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,128,1,float16,float16,0,1.2535146872202556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,128,1,float16,fp8,0,1.2450666427612305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,128,1,fp8,fp8,0,0.8326506614685059
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,128,1,fp8,fp8,0,0.819381316502889
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,128,1,float16,float16,0,1.254421313603719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,128,1,float16,fp8,0,1.2524747053782146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,128,1,fp8,fp8,0,0.8260587056477865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,128,1,float16,float16,0,1.2615359624226887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,128,1,float16,fp8,0,1.2532587051391602
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,128,1,float16,float16,0,0.6059786478678385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,128,1,float16,float16,0,0.6075733502705892
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,128,1,float16,fp8,0,0.6077333291371664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,128,1,float16,fp8,0,0.604474663734436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,128,1,fp8,fp8,0,0.44522666931152344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,128,1,fp8,fp8,0,0.8348586559295654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,128,1,fp8,fp8,0,0.441648006439209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,128,1,float16,fp8,0,0.6029546658198038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,128,1,fp8,fp8,0,0.446176012357076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,128,1,float16,float16,0,0.6084426641464233
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,128,1,float16,float16,0,0.6137439807256063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,128,1,float16,fp8,0,0.6107626756032308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,128,1,fp8,fp8,0,0.44789334138234455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,128,1,float16,fp8,0,3.3921225865681968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,128,1,float16,float16,0,3.261727968851725
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,128,1,fp8,fp8,0,2.295423984527588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,128,1,float16,float16,0,3.3765014012654624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,128,1,float16,fp8,0,3.243055979410807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,128,1,fp8,fp8,0,2.3010667165120444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,128,1,float16,float16,0,3.4067999521891275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,128,1,float16,fp8,0,3.382826805114746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,128,1,float16,fp8,0,1.6935893694559734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,128,1,float16,float16,0,1.6908213297526042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,128,1,float16,fp8,0,1.6784799893697102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,128,1,float16,float16,0,1.682154655456543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,128,1,fp8,fp8,0,1.2217493057250977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,128,1,fp8,fp8,0,1.1986506779988606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,128,1,fp8,fp8,0,2.324853261311849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,128,1,float16,float16,0,1.6929012934366863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,128,1,fp8,fp8,0,1.2067840099334717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,128,1,float16,fp8,0,1.700991948445638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,128,1,float16,float16,0,0.9154187043507894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,128,1,float16,float16,0,1.7014452616373699
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,128,1,float16,fp8,0,0.9044319788614908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,128,1,float16,fp8,0,0.911893367767334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,128,1,fp8,fp8,0,1.2183626492818196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,128,1,fp8,fp8,0,0.610922654469808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,128,1,float16,float16,0,0.9138773282368978
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,128,1,float16,fp8,0,1.690618673960368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,128,1,fp8,fp8,0,0.6037760178248087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,128,1,float16,float16,0,0.9146773020426432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,128,1,fp8,fp8,0,0.6069066524505615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,128,1,float16,fp8,0,0.9115573565165201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,128,1,float16,fp8,0,0.9138720035552979
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,128,1,float16,float16,0,0.9234506289164225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,128,1,float16,float16,0,0.44660266240437824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,128,1,fp8,fp8,0,0.6147199869155884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,128,1,float16,fp8,0,0.4474879900614421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,128,1,float16,float16,0,0.44941333929697674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,128,1,fp8,fp8,0,0.3379253149032593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,128,1,float16,fp8,0,0.44464532534281415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,128,1,fp8,fp8,0,0.33241067330042523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,128,1,float16,float16,0,0.4522453149159749
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,128,1,float16,fp8,0,0.4466400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,128,1,fp8,fp8,0,0.33528534571329754
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,128,1,float16,float16,0,0.45495466391245526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,128,1,float16,fp8,0,0.4502986669540405
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,128,1,fp8,fp8,0,0.3385973374048869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,128,1,float16,float16,0,4.4028746287028
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,128,1,float16,fp8,0,4.368058522542317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,128,1,fp8,fp8,0,3.030986785888672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,128,1,fp8,fp8,0,3.0529279708862305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,128,1,float16,fp8,0,4.243338584899902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,128,1,float16,float16,0,4.368618647257487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,128,1,float16,float16,0,4.506906509399414
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,128,1,float16,fp8,0,4.256869316101074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,128,1,float16,fp8,0,2.1538826624552407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,128,1,float16,float16,0,2.178442637125651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,128,1,float16,float16,0,2.1768693923950195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,128,1,fp8,fp8,0,1.5714613596598308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,128,1,float16,fp8,0,2.190890630086263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,128,1,float16,float16,0,2.185093402862549
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,128,1,fp8,fp8,0,1.6023146311442058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,128,1,fp8,fp8,0,3.0838133494059243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,128,1,fp8,fp8,0,1.5719839731852214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,128,1,float16,fp8,0,2.1601759592692056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,128,1,float16,float16,0,2.200511932373047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,128,1,float16,float16,0,1.1387413342793782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,128,1,float16,fp8,0,2.20525328318278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,128,1,fp8,fp8,0,1.5917812983194988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,128,1,float16,fp8,0,1.1505920092264812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,128,1,float16,float16,0,1.140565315882365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,128,1,fp8,fp8,0,0.850389321645101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,128,1,fp8,fp8,0,0.8236640294392904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,128,1,float16,fp8,0,1.1297067006429036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,128,1,float16,float16,0,1.1450506846110027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,128,1,float16,fp8,0,1.1443946361541748
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,128,1,fp8,fp8,0,0.8298986752827963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,128,1,float16,float16,0,1.160805304845174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,128,1,fp8,fp8,0,0.8393066724141439
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,128,1,float16,fp8,0,1.141424020131429
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,128,1,float16,float16,0,0.6229759852091471
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,128,1,float16,float16,0,0.6293919881184896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,128,1,float16,fp8,0,0.6239999930063883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,128,1,fp8,fp8,0,0.42838935057322186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,128,1,fp8,fp8,0,0.4187999963760376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,128,1,float16,fp8,0,0.6212106545766195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,128,1,float16,float16,0,0.6333706776301066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,128,1,fp8,fp8,0,0.41914665699005127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,128,1,float16,fp8,0,0.6254986524581909
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,128,1,float16,fp8,0,0.6320213476816813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,128,1,float16,float16,0,0.6360906759897867
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,128,1,fp8,fp8,0,0.42636799812316895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,128,1,float16,float16,0,0.30455466111501056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,128,1,float16,fp8,0,0.30082666873931885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,128,1,fp8,fp8,0,0.23825067281723022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,128,1,float16,float16,0,0.3020159999529521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,128,1,fp8,fp8,0,0.23105067014694214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,128,1,float16,float16,0,0.3038506706555684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,128,1,float16,fp8,0,0.3001386721928914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,128,1,fp8,fp8,0,0.23416000604629517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,128,1,float16,fp8,0,0.2993386586507161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,128,1,float16,fp8,0,0.3015413284301758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,128,1,float16,float16,0,0.30500266949335736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,128,1,fp8,fp8,0,0.23547732830047607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,128,1,float16,fp8,0,2.546917279561361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,128,1,fp8,fp8,0,1.87337064743042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,128,1,float16,float16,0,2.565626621246338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,128,1,float16,float16,0,2.6066880226135254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,128,1,fp8,fp8,0,1.8849867184956868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,128,1,float16,fp8,0,2.5267252922058105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,128,1,float16,fp8,0,2.547877311706543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,128,1,float16,float16,0,2.564634641011556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,128,1,float16,float16,0,1.31440003712972
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,128,1,float16,fp8,0,1.2974080244700115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,128,1,float16,float16,0,1.320357322692871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,128,1,fp8,fp8,0,0.9732586542765299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,128,1,fp8,fp8,0,1.006287972132365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,128,1,float16,fp8,0,1.3275466759999592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,128,1,fp8,fp8,0,1.9150080680847168
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,128,1,float16,float16,0,1.3158559799194336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,128,1,float16,fp8,0,1.3044533729553223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,128,1,fp8,fp8,0,0.9779253005981445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,128,1,float16,float16,0,0.7067626317342123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,128,1,float16,fp8,0,1.3142399787902832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,128,1,float16,fp8,0,0.706000010172526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,128,1,float16,float16,0,1.3282293478647869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,128,1,float16,float16,0,0.7003200054168701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,128,1,fp8,fp8,0,0.9940266609191895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,128,1,float16,fp8,0,0.6934879620869955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,128,1,fp8,fp8,0,0.5114880005518595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,128,1,fp8,fp8,0,0.4911839962005615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,128,1,float16,float16,0,0.7034239768981934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,128,1,fp8,fp8,0,0.49659732977549237
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,128,1,float16,fp8,0,0.6991786956787109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,128,1,float16,float16,0,0.7079573472340902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,128,1,float16,fp8,0,0.7069386641184489
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,128,1,float16,float16,0,0.34698132673899335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,128,1,fp8,fp8,0,0.506384015083313
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,128,1,float16,float16,0,0.3434773286183675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,128,1,float16,fp8,0,0.33903467655181885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,128,1,float16,fp8,0,0.34829334417978924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,128,1,fp8,fp8,0,0.27459200223286945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,128,1,float16,float16,0,0.3474133412043254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,128,1,fp8,fp8,0,0.2702186703681946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,128,1,float16,fp8,0,0.3402080138524373
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,128,1,fp8,fp8,0,0.2661973237991333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,128,1,float16,float16,0,0.34828801949818927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,128,1,float16,fp8,0,0.345360000928243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,128,1,float16,fp8,0,0.20225600401560465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,128,1,float16,float16,0,0.2025173306465149
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,128,1,fp8,fp8,0,0.2727839946746826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,128,1,float16,fp8,0,0.19818667570749918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,128,1,fp8,fp8,0,0.1630293329556783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,128,1,float16,float16,0,0.19814399878184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,128,1,fp8,fp8,0,0.1548640032609304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,128,1,float16,float16,0,0.1997599999109904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,128,1,float16,fp8,0,0.19977599382400513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,128,1,fp8,fp8,0,0.15656532843907675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,128,1,float16,fp8,0,0.20113599300384521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,128,1,float16,float16,0,0.203439990679423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,128,1,fp8,fp8,0,0.161189337571462
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,128,1,float16,fp8,0,2.444570700327555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,128,1,float16,float16,0,2.492448012034098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,128,1,fp8,fp8,0,1.918565273284912
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,128,1,float16,float16,0,2.5362879435221353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,128,1,float16,fp8,0,2.460106690724691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,128,1,fp8,fp8,0,1.931829293568929
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,128,1,float16,float16,0,2.553679943084717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,128,1,float16,fp8,0,2.4901013374328613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,128,1,float16,float16,0,1.2747519810994465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,128,1,float16,fp8,0,1.2525866826375325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,128,1,float16,fp8,0,1.2968053023020427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,128,1,fp8,fp8,0,0.9843680063883463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,128,1,fp8,fp8,0,1.0343999862670898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,128,1,float16,float16,0,1.2888800303141277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,128,1,fp8,fp8,0,1.9658293724060059
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,128,1,float16,fp8,0,1.2587786515553792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,128,1,float16,float16,0,1.286895990371704
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,128,1,fp8,fp8,0,0.990826686223348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,128,1,float16,float16,0,0.6687520345052084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,128,1,float16,float16,0,1.30348801612854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,128,1,float16,fp8,0,1.2820213635762532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,128,1,float16,fp8,0,0.6719679832458496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,128,1,float16,fp8,0,0.6576639811197916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,128,1,fp8,fp8,0,1.0129760106404622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,128,1,fp8,fp8,0,0.5441333452860514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,128,1,float16,float16,0,0.674186627070109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,128,1,fp8,fp8,0,0.5174826780954996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,128,1,float16,float16,0,0.6721920172373453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,128,1,float16,fp8,0,0.6624266703923544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,128,1,fp8,fp8,0,0.5208160082499186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,128,1,float16,fp8,0,0.6715253194173177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,128,1,fp8,fp8,0,0.5305973291397095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,128,1,float16,float16,0,0.3696959813435872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,128,1,float16,float16,0,0.675322691599528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,128,1,float16,float16,0,0.36745599905649823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,128,1,float16,fp8,0,0.3696639935175578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,128,1,float16,fp8,0,0.36158935228983563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,128,1,fp8,fp8,0,0.2830880085627238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,128,1,fp8,fp8,0,0.2656586567560832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,128,1,float16,float16,0,0.3686453501383464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,128,1,float16,fp8,0,0.36343467235565186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,128,1,fp8,fp8,0,0.26863465706507367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,128,1,float16,float16,0,0.3736480077107747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,128,1,fp8,fp8,0,0.2749813397725423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,128,1,float16,float16,0,0.18664532899856567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,128,1,float16,fp8,0,0.18477867046991983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,128,1,fp8,fp8,0,0.15584533413251242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,128,1,float16,fp8,0,0.36957331498463947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,128,1,fp8,fp8,0,0.14601066708564758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,128,1,float16,fp8,0,0.1763146718343099
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,128,1,float16,float16,0,0.17808000246683756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,128,1,float16,fp8,0,0.1787359913190206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,128,1,float16,float16,0,0.18122132619222006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,128,1,fp8,fp8,0,0.15054399768511453
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,128,1,float16,float16,0,0.18336532513300577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,128,1,fp8,fp8,0,0.15275733669598898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,128,1,float16,float16,0,0.10560533404350281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,128,1,float16,float16,0,0.10314666231473286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,128,1,fp8,fp8,0,0.0928106705347697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,128,1,float16,fp8,0,0.18076799313227335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,128,1,float16,fp8,0,0.10227200388908386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,128,1,float16,fp8,0,0.10539733370145161
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,128,1,fp8,fp8,0,0.08595200379689534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,128,1,float16,float16,0,0.10342933734258015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,128,1,fp8,fp8,0,0.08665066957473755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,128,1,float16,fp8,0,0.1023573378721873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,128,1,float16,float16,0,0.10393066207567851
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,128,1,fp8,fp8,0,0.08865599830945332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,128,1,float16,fp8,0,0.10331733028093974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,128,1,float16,fp8,0,1.5183946291605632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,128,1,fp8,fp8,0,1.23581329981486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,128,1,float16,float16,0,1.5548693339029949
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,128,1,float16,float16,0,1.5680054028828938
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,128,1,float16,fp8,0,1.5334614117940266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,128,1,fp8,fp8,0,1.2537706693013508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,128,1,float16,float16,0,1.6013813018798828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,128,1,float16,fp8,0,1.5578400293986003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,128,1,float16,fp8,0,0.7847786744435629
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,128,1,float16,float16,0,0.7968586285909017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,128,1,fp8,fp8,0,0.6397653420766195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,128,1,float16,float16,0,0.82314133644104
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,128,1,fp8,fp8,0,0.6806613604227701
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,128,1,float16,fp8,0,0.806816021601359
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,128,1,fp8,fp8,0,1.2763786315917969
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,128,1,float16,fp8,0,0.7913280328114828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,128,1,fp8,fp8,0,0.6450719833374023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,128,1,float16,float16,0,0.8029867013295492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,128,1,float16,float16,0,0.8167306582132975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,128,1,float16,float16,0,0.42445866266886395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,128,1,fp8,fp8,0,0.6615893443425497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,128,1,float16,float16,0,0.43306132157643634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,128,1,float16,fp8,0,0.43026665846506756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,128,1,float16,fp8,0,0.8022933006286621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,128,1,float16,fp8,0,0.41787731647491455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,128,1,fp8,fp8,0,0.3543573220570882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,128,1,fp8,fp8,0,0.3255946636199951
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,128,1,float16,float16,0,0.42716264724731445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,128,1,float16,fp8,0,0.42136534055074054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,128,1,float16,float16,0,0.4318559964497884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,128,1,fp8,fp8,0,0.328490674495697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,128,1,float16,fp8,0,0.42841601371765137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,128,1,float16,float16,0,0.22315200169881186
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,128,1,float16,fp8,0,0.2220319906870524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,128,1,float16,float16,0,0.2116853396097819
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,128,1,fp8,fp8,0,0.3392639954884847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,128,1,float16,fp8,0,0.20838399728139242
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,128,1,float16,float16,0,0.21471466620763144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,128,1,fp8,fp8,0,0.18995734055836996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,128,1,float16,fp8,0,0.21073599656422934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,128,1,fp8,fp8,0,0.17652267217636108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,128,1,fp8,fp8,0,0.17946134010950723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,128,1,float16,float16,0,0.21781333287556967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,128,1,fp8,fp8,0,0.18331199884414673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,128,1,float16,float16,0,0.1271573305130005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,128,1,float16,fp8,0,0.12615467111269632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,128,1,float16,fp8,0,0.11966400345166524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,128,1,float16,fp8,0,0.21527467171351114
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,128,1,fp8,fp8,0,0.10985599954922994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,128,1,float16,float16,0,0.12061867117881775
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,128,1,fp8,fp8,0,0.10021332899729411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,128,1,float16,float16,0,0.12248532970746358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,128,1,float16,fp8,0,0.1200213332970937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,128,1,float16,float16,0,0.12405866384506226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,128,1,fp8,fp8,0,0.1018986701965332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,128,1,fp8,fp8,0,0.10656533638636272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,128,1,float16,float16,0,0.07825066645940144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,128,1,float16,float16,0,0.07815999786059062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,128,1,float16,fp8,0,0.07875733574231465
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,128,1,fp8,fp8,0,0.06832000116507213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,128,1,float16,fp8,0,0.1230560044447581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,128,1,float16,fp8,0,0.07755733529726665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,128,1,fp8,fp8,0,0.06560533245404561
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,128,1,fp8,fp8,0,0.06509333352247874
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,128,1,float16,float16,0,0.0782773345708847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,128,1,float16,float16,0,0.07853333155314128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,128,1,float16,fp8,0,0.07801066835721333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,128,1,float16,fp8,0,0.07745066781838734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,128,1,fp8,fp8,0,0.06568533182144165
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,128,1,float16,float16,0,1.6621653238932292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,128,1,fp8,fp8,0,1.3817812601725261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,128,1,float16,fp8,0,1.5945760409037273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,128,1,fp8,fp8,0,1.3898560206095378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,128,1,float16,float16,0,1.6761760711669922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,128,1,float16,fp8,0,1.6074825922648113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,128,1,float16,float16,0,1.6943999926249187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,128,1,float16,fp8,0,1.6335466702779133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,128,1,float16,float16,0,0.8364213307698568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,128,1,float16,fp8,0,0.8126346270243326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,128,1,float16,float16,0,0.879210631052653
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,128,1,fp8,fp8,0,0.7566773096720377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,128,1,float16,fp8,0,0.8501866658528646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,128,1,fp8,fp8,0,0.6990400155385336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,128,1,fp8,fp8,0,1.4213706652323406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,128,1,float16,float16,0,0.8432799975077311
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,128,1,float16,fp8,0,0.8225119908650717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,128,1,fp8,fp8,0,0.7088267008463541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,128,1,float16,float16,0,0.4336800177892049
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,128,1,float16,fp8,0,0.422757347424825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,128,1,float16,float16,0,0.8601386547088623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,128,1,fp8,fp8,0,0.3964426517486572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,128,1,float16,fp8,0,0.44732268651326496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,128,1,float16,fp8,0,0.8329386711120605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,128,1,float16,float16,0,0.45052266120910645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,128,1,fp8,fp8,0,0.7272586822509766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,128,1,fp8,fp8,0,0.3656853437423706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,128,1,float16,fp8,0,0.42876267433166504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,128,1,float16,float16,0,0.4357066551844279
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,128,1,fp8,fp8,0,0.3717546860376994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,128,1,float16,fp8,0,0.4346666733423869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,128,1,float16,float16,0,0.4447733163833618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,128,1,fp8,fp8,0,0.37858132521311444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,128,1,float16,float16,0,0.24639999866485596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,128,1,float16,float16,0,0.23612266778945923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,128,1,float16,fp8,0,0.24450665712356567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,128,1,fp8,fp8,0,0.2072533369064331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,128,1,float16,fp8,0,0.2318613330523173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,128,1,fp8,fp8,0,0.187882661819458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,128,1,float16,float16,0,0.23870933055877686
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,128,1,float16,fp8,0,0.23280000686645508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,128,1,float16,fp8,0,0.23940799633661905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,128,1,float16,float16,0,0.24237332741419473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,128,1,float16,float16,0,0.12617599964141846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,128,1,fp8,fp8,0,0.19660266240437826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,128,1,fp8,fp8,0,0.19106133778889975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,128,1,float16,fp8,0,0.12560000022252402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,128,1,fp8,fp8,0,0.11652266979217529
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,128,1,float16,fp8,0,0.11665599544843037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,128,1,fp8,fp8,0,0.10472533106803894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,128,1,float16,float16,0,0.11974400281906128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,128,1,float16,fp8,0,0.11822932958602905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,128,1,float16,float16,0,0.11802666385968526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,128,1,float16,fp8,0,0.12143466869990031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,128,1,float16,float16,0,0.12224533160527547
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,128,1,fp8,fp8,0,0.10764799515406291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,128,1,fp8,fp8,0,0.11097600062688191
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,128,1,float16,float16,0,0.07297599812348683
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,128,1,float16,fp8,0,0.06772266825040181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,128,1,float16,fp8,0,0.07228800157705943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,128,1,fp8,fp8,0,0.0689386675755183
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,128,1,fp8,fp8,0,0.06091733276844025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,128,1,float16,float16,0,0.06911466519037883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,128,1,float16,float16,0,0.06825600067774455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,128,1,float16,fp8,0,0.06771733363469441
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,128,1,fp8,fp8,0,0.061466669042905174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,128,1,float16,float16,0,0.06945066650708516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,128,1,fp8,fp8,0,0.06420266628265381
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,128,1,float16,float16,0,0.05748266478379568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,128,1,float16,fp8,0,0.057674666245778404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,128,1,fp8,fp8,0,0.04993600149949392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,128,1,float16,float16,0,0.0574239989121755
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,128,1,float16,fp8,0,0.05670933425426483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,128,1,float16,fp8,0,0.06868800024191539
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,128,1,fp8,fp8,0,0.048895999789237976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,128,1,float16,float16,0,0.05792533357938131
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,128,1,float16,fp8,0,0.05683733522891998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,128,1,fp8,fp8,0,0.049471999208132424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,128,1,float16,fp8,0,0.05727999905745188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,128,1,fp8,fp8,0,0.04970133304595947
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,128,1,float16,float16,0,0.05751466751098633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,128,1,float16,fp8,0,1.0280640125274658
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,128,1,fp8,fp8,0,0.9225227038065592
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,128,1,float16,float16,0,1.0758612950642903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,128,1,float16,float16,0,1.0811573664347331
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,128,1,float16,fp8,0,1.0371519724527996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,128,1,fp8,fp8,0,0.9369760354359945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,128,1,float16,float16,0,1.1003413200378418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,128,1,float16,fp8,0,1.054202636082967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,128,1,float16,float16,0,0.5469333330790201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,128,1,float16,fp8,0,0.5311306715011597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,128,1,float16,float16,0,0.5773066679636637
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,128,1,fp8,fp8,0,0.5238080024719238
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,128,1,fp8,fp8,0,0.47491200764973956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,128,1,fp8,fp8,0,0.966655969619751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,128,1,float16,fp8,0,0.5655680100123087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,128,1,fp8,fp8,0,0.4829920132954915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,128,1,float16,float16,0,0.5517760117848715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,128,1,float16,fp8,0,0.5358506838480631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,128,1,float16,float16,0,0.5640960137049357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,128,1,float16,float16,0,0.30508265892664593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,128,1,float16,fp8,0,0.5443626642227173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,128,1,fp8,fp8,0,0.49908800919850665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,128,1,float16,float16,0,0.2878986597061157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,128,1,float16,fp8,0,0.2827039957046509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,128,1,float16,fp8,0,0.3006826639175415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,128,1,fp8,fp8,0,0.2772480050722758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,128,1,fp8,fp8,0,0.24437334140141806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,128,1,float16,fp8,0,0.28545600175857544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,128,1,float16,float16,0,0.2929706573486328
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,128,1,fp8,fp8,0,0.24961066246032715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,128,1,float16,float16,0,0.2967093388239543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,128,1,float16,fp8,0,0.29077865680058795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,128,1,float16,float16,0,0.1477226714293162
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,128,1,float16,float16,0,0.16245866815249124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,128,1,float16,fp8,0,0.1612320045630137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,128,1,float16,fp8,0,0.14566399653752646
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,128,1,fp8,fp8,0,0.25775466362635296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,128,1,fp8,fp8,0,0.14658666650454202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,128,1,float16,float16,0,0.1497173309326172
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,128,1,float16,fp8,0,0.14774399995803833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,128,1,fp8,fp8,0,0.1344480017820994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,128,1,fp8,fp8,0,0.1327893336613973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,128,1,float16,float16,0,0.1539520025253296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,128,1,float16,float16,0,0.08919466535250346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,128,1,float16,fp8,0,0.1511679987112681
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,128,1,fp8,fp8,0,0.1402346690495809
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,128,1,float16,float16,0,0.08140799899895985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,128,1,fp8,fp8,0,0.08682666222254436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,128,1,fp8,fp8,0,0.07401599983374278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,128,1,float16,float16,0,0.08242133259773254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,128,1,float16,fp8,0,0.08901333808898926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,128,1,float16,fp8,0,0.08120533327261607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,128,1,float16,fp8,0,0.08028799792130788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,128,1,float16,float16,0,0.08505599697430928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,128,1,float16,float16,0,0.05328000088532766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,128,1,fp8,fp8,0,0.08235733211040497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,128,1,float16,fp8,0,0.0846560001373291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,128,1,fp8,fp8,0,0.050554667909940086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,128,1,float16,fp8,0,0.053632001082102455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,128,1,fp8,fp8,0,0.07604266703128815
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,128,1,float16,float16,0,0.05209066470464071
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,128,1,float16,fp8,0,0.05111999809741974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,128,1,fp8,fp8,0,0.046853333711624146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,128,1,float16,float16,0,0.0525546669960022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,128,1,float16,fp8,0,0.05142933130264282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,128,1,fp8,fp8,0,0.04717866579691569
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,128,1,float16,float16,0,0.05268799761931101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,128,1,fp8,fp8,0,0.04825599988301595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,128,1,float16,float16,0,0.04740799963474274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,128,1,float16,fp8,0,0.05221333106358846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,128,1,float16,fp8,0,0.04720533390839895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,128,1,fp8,fp8,0,0.04177066683769226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,128,1,float16,float16,0,0.0468746672074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,128,1,float16,fp8,0,0.04658666749795278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,128,1,fp8,fp8,0,0.04107200105985006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,128,1,float16,fp8,0,0.046869332591692604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,128,1,float16,float16,0,0.04696533580621084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,128,1,float16,float16,0,0.04734933376312256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,128,1,float16,fp8,0,0.047024001677831016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,128,1,fp8,fp8,0,0.041002665956815086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,128,1,fp8,fp8,0,0.041493333876132965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,128,1,float16,float16,0,0.996346632639567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,128,1,fp8,fp8,0,0.8873279889424642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,128,1,float16,fp8,0,0.9910986423492432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,128,1,fp8,fp8,0,0.9399466514587402
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,128,1,float16,fp8,0,1.0047252972920735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,128,1,float16,float16,0,1.0076639652252197
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,128,1,float16,float16,0,1.0495466391245525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,128,1,float16,fp8,0,1.0632266998291016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,128,1,float16,float16,0,0.5122613509496053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,128,1,float16,fp8,0,0.5114239851633707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,128,1,float16,float16,0,0.5824853181838989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,128,1,fp8,fp8,0,0.44977064927419025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,128,1,fp8,fp8,0,0.5317013263702393
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,128,1,float16,float16,0,0.5194186766942342
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,128,1,float16,fp8,0,0.5704319874445597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,128,1,float16,fp8,0,0.5168799956639608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,128,1,fp8,fp8,0,1.020693302154541
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,128,1,fp8,fp8,0,0.47359999020894367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,128,1,float16,float16,0,0.2717600067456563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,128,1,float16,float16,0,0.5335093339284261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,128,1,float16,float16,0,0.3041013280550639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,128,1,fp8,fp8,0,0.27567466100056964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,128,1,fp8,fp8,0,0.5247840086619059
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,128,1,float16,fp8,0,0.5307146708170573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,128,1,float16,fp8,0,0.268394668896993
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,128,1,fp8,fp8,0,0.23811199267705283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,128,1,float16,fp8,0,0.2976693312327067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,128,1,float16,float16,0,0.27351999282836914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,128,1,float16,fp8,0,0.2732586661974589
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,128,1,fp8,fp8,0,0.2464159925778707
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,128,1,float16,float16,0,0.2834986646970113
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,128,1,fp8,fp8,0,0.2711413304011027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,128,1,float16,float16,0,0.16510933637619019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,128,1,float16,fp8,0,0.28055999676386517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,128,1,float16,float16,0,0.149509330590566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,128,1,float16,fp8,0,0.14874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,128,1,fp8,fp8,0,0.1420799990495046
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,128,1,float16,fp8,0,0.16342399517695108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,128,1,fp8,fp8,0,0.12600533167521158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,128,1,float16,fp8,0,0.14964800079663595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,128,1,fp8,fp8,0,0.1299679974714915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,128,1,float16,float16,0,0.15092800060908
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,128,1,float16,float16,0,0.15757866700490317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,128,1,float16,fp8,0,0.1551359991232554
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,128,1,float16,fp8,0,0.08629866441090901
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,128,1,fp8,fp8,0,0.1405119995276133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,128,1,fp8,fp8,0,0.07946133116881053
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,128,1,fp8,fp8,0,0.07030933101971944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,128,1,float16,fp8,0,0.07745066781838734
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,128,1,float16,float16,0,0.07926933467388153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,128,1,float16,float16,0,0.08840533097585042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,128,1,float16,float16,0,0.07823466757933299
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,128,1,fp8,fp8,0,0.07260799904664357
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,128,1,float16,fp8,0,0.0791733314593633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,128,1,float16,float16,0,0.08183999856313069
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,128,1,float16,fp8,0,0.08206399778525035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,128,1,float16,float16,0,0.0506933331489563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,128,1,fp8,fp8,0,0.07633600135644276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,128,1,float16,float16,0,0.04508799811204275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,128,1,float16,fp8,0,0.04520000020662943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,128,1,fp8,fp8,0,0.04784533381462097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,128,1,float16,fp8,0,0.05003199974695841
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,128,1,fp8,fp8,0,0.04145599901676178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,128,1,float16,float16,0,0.045824001232783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,128,1,float16,fp8,0,0.0454773356517156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,128,1,float16,float16,0,0.04696000119050344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,128,1,fp8,fp8,0,0.04219200213750204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,128,1,float16,fp8,0,0.04701866706212362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,128,1,fp8,fp8,0,0.046037331223487854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,128,1,float16,float16,0,0.03679466744263967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,128,1,float16,fp8,0,0.03622400015592575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,128,1,float16,float16,0,0.034714666505654655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,128,1,fp8,fp8,0,0.032586666444937386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,128,1,float16,fp8,0,0.03514133393764496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,128,1,fp8,fp8,0,0.031162666777769726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,128,1,float16,float16,0,0.0352960005402565
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,128,1,float16,float16,0,0.03559466699759165
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,128,1,float16,fp8,0,0.03525333354870478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,128,1,float16,fp8,0,0.03603200117746989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,128,1,fp8,fp8,0,0.03223466624816259
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,128,1,float16,float16,0,0.03179733455181122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,128,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,128,1,fp8,fp8,0,0.028778667251269024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,128,1,float16,float16,0,0.03221333275238673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,128,1,float16,float16,0,0.0322773332397143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,128,1,float16,fp8,0,0.032218667368094124
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,128,1,fp8,fp8,0,0.028207999964555103
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,128,1,float16,fp8,0,0.03183999905983607
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,128,1,float16,float16,0,0.032255999743938446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,128,1,float16,fp8,0,0.0322080006202062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,128,1,fp8,fp8,0,0.02829333394765854
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,128,1,float16,fp8,0,0.03241066634654999
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,128,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,1,128,1,float16,fp8,0,0.7723893324534098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,1,128,1,float16,float16,0,0.7755573590596517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,2,128,1,float16,float16,0,0.7919466495513916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,1,128,1,fp8,fp8,0,0.7353119850158691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,2,128,1,float16,fp8,0,0.7847359975179037
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,2,128,1,fp8,fp8,0,0.7916906674702963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,4,128,1,float16,fp8,0,0.828005313873291
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,4,128,1,float16,float16,0,0.8325866858164469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,1,128,1,float16,float16,0,0.39928531646728516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,1,128,1,fp8,fp8,0,0.37281068166097003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,1,128,1,float16,fp8,0,0.3981920083363851
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,2,128,1,float16,float16,0,0.40621332327524823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,8,128,1,float16,float16,0,0.47629332542419434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,8,128,1,float16,fp8,0,0.46541865666707355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,8,128,1,fp8,fp8,0,0.4502933422724406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,8,4,128,1,fp8,fp8,0,0.8690186341603597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,2,128,1,float16,fp8,0,0.4043733278910319
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,2,128,1,fp8,fp8,0,0.40141332149505615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,1,128,1,float16,float16,0,0.21091200908025107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,8,128,1,float16,float16,0,0.24783466259638467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,8,128,1,fp8,fp8,0,0.2345973253250122
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,4,128,1,float16,float16,0,0.4243839979171753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,4,128,1,float16,fp8,0,0.4190719922383626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,1,128,1,float16,fp8,0,0.21052267154057822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,8,128,1,float16,fp8,0,0.2403786579767863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,8,4,128,1,fp8,fp8,0,0.4436426560084025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,1,128,1,fp8,fp8,0,0.1967839996019999
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,2,128,1,float16,fp8,0,0.21399466196695963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,2,128,1,float16,float16,0,0.21427732706069946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,2,128,1,fp8,fp8,0,0.20675732692082724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,4,128,1,float16,float16,0,0.22317866484324136
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,4,128,1,float16,fp8,0,0.2230293353398641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,8,128,1,float16,fp8,0,0.1322879989941915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,8,128,1,float16,float16,0,0.13482667009035745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,8,4,128,1,fp8,fp8,0,0.23197333017985025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,1,128,1,float16,float16,0,0.11719999710718791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,1,128,1,fp8,fp8,0,0.10877333084742229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,2,128,1,float16,float16,0,0.12010666728019714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,1,128,1,float16,fp8,0,0.11758400003115337
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,8,128,1,fp8,fp8,0,0.1255626678466797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,2,128,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,2,128,1,float16,fp8,0,0.11899200081825256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,4,128,1,float16,float16,0,0.1264639993508657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,4,128,1,float16,fp8,0,0.12552533547083536
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,8,128,1,float16,float16,0,0.07500799993673961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,8,4,128,1,fp8,fp8,0,0.12166399757067363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,8,128,1,float16,fp8,0,0.07420266668001811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,8,128,1,fp8,fp8,0,0.07070399820804596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,1,128,1,float16,float16,0,0.06381866832574208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,2,128,1,float16,float16,0,0.06599999964237213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,1,128,1,fp8,fp8,0,0.06019733349482218
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,1,128,1,float16,fp8,0,0.0646666685740153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,2,128,1,float16,fp8,0,0.06651733318964641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,2,128,1,fp8,fp8,0,0.06257066627343495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,4,128,1,float16,fp8,0,0.06887466708819072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,8,128,1,float16,float16,0,0.043840001026789345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,4,128,1,float16,float16,0,0.06945066650708516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,8,4,128,1,fp8,fp8,0,0.06794133285681407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,8,128,1,fp8,fp8,0,0.04253333310286204
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,1,128,1,float16,float16,0,0.03879466652870178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,1,128,1,float16,fp8,0,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,8,128,1,float16,fp8,0,0.04382933179537455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,1,128,1,fp8,fp8,0,0.03623466690381368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,2,128,1,float16,float16,0,0.0391893337170283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,4,128,1,float16,fp8,0,0.04062400013208389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,4,128,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,2,128,1,fp8,fp8,0,0.03696533292531967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,8,128,1,float16,fp8,0,0.03035199890534083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,8,128,1,float16,float16,0,0.030005333324273426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,8,128,1,fp8,fp8,0,0.02794133375088374
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,4,128,1,float16,float16,0,0.04027733455101649
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,8,2,128,1,float16,fp8,0,0.03923733284076055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,1,128,1,float16,float16,0,0.028069332242012024
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,1,128,1,float16,fp8,0,0.02792000025510788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,2,128,1,fp8,fp8,0,0.026895999908447266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,2,128,1,float16,float16,0,0.02846933404604594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,4,128,1,fp8,fp8,0,0.028165332973003387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,2,128,1,float16,fp8,0,0.028229333460330963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,4,128,1,float16,fp8,0,0.029088000456492107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,1,128,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,8,4,128,1,float16,float16,0,0.028853334486484528
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,8,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,8,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,1,128,1,float16,float16,0,0.02510400116443634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,8,128,1,fp8,fp8,0,0.024501333634058636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,1,128,1,fp8,fp8,0,0.02380799998839696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,1,128,1,float16,fp8,0,0.0249439999461174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,4,128,1,float16,fp8,0,0.02587199956178665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,2,128,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,2,128,1,float16,float16,0,0.02531733363866806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,4,128,1,float16,float16,0,0.025418666501839954
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,4,128,1,fp8,fp8,0,0.024634666740894318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,8,128,1,float16,float16,0,0.023621333142121632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,8,2,128,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,8,128,1,fp8,fp8,0,0.022826666633288067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,8,128,1,float16,fp8,0,0.023898666103680927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,2,128,1,float16,float16,0,0.024298667907714844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,1,128,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,1,128,1,float16,fp8,0,0.02447466552257538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,4,128,1,float16,float16,0,0.024031999210516613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,2,128,1,float16,fp8,0,0.024986666937669117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,1,128,1,float16,float16,0,0.023999998966852825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,4,128,1,fp8,fp8,0,0.02292266736427943
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,4,128,1,float16,fp8,0,0.02409599969784419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,8,2,128,1,fp8,fp8,0,0.022895999252796173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,1,128,1,float16,float16,0,0.3395093282063802
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,1,128,1,fp8,fp8,0,0.3293280005455017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,1,128,1,float16,fp8,0,0.3369973500569661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,2,128,1,float16,float16,0,0.3502720197041829
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,2,128,1,fp8,fp8,0,0.354912002881368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,2,128,1,float16,fp8,0,0.34545600414276123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,4,128,1,float16,float16,0,0.3707360029220581
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,4,128,1,float16,fp8,0,0.35977598031361896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,1,128,1,float16,float16,0,0.17991999785105386
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,1,128,1,float16,fp8,0,0.17937066157658896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,8,128,1,float16,float16,0,0.21853333711624146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,8,128,1,float16,fp8,0,0.2119093338648478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,8,4,128,1,fp8,fp8,0,0.39964266618092853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,1,128,1,fp8,fp8,0,0.17394665877024332
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,8,128,1,fp8,fp8,0,0.21356266736984253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,2,128,1,float16,fp8,0,0.18352532386779785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,2,128,1,float16,float16,0,0.18382400274276733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,2,128,1,fp8,fp8,0,0.18395733833312988
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,4,128,1,float16,fp8,0,0.1906879941622416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,4,128,1,fp8,fp8,0,0.20816532770792642
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,8,4,128,1,float16,float16,0,0.19618666172027588
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,1,128,1,float16,float16,0,0.10073066751162212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,8,128,1,float16,float16,0,0.12149332960446675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,8,128,1,fp8,fp8,0,0.1163200040658315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,1,128,1,float16,fp8,0,0.10107200344403584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,8,128,1,float16,fp8,0,0.11760532855987549
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,1,128,1,fp8,fp8,0,0.09794666369756062
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,2,128,1,float16,float16,0,0.1046506663163503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,2,128,1,float16,fp8,0,0.10435199737548828
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,2,128,1,fp8,fp8,0,0.10085333387056987
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,4,128,1,float16,fp8,0,0.10916800300280254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,4,128,1,float16,float16,0,0.11010666688283284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,1,128,1,float16,float16,0,0.05702400207519531
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,8,128,1,float16,fp8,0,0.07001600166161855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,8,4,128,1,fp8,fp8,0,0.11229866743087769
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,8,128,1,float16,float16,0,0.07072533170382182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,8,128,1,fp8,fp8,0,0.06612800061702728
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,1,128,1,float16,fp8,0,0.057904000083605446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,1,128,1,fp8,fp8,0,0.0552106648683548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,2,128,1,float16,fp8,0,0.05913599828879038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,2,128,1,float16,float16,0,0.0591786652803421
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,2,128,1,fp8,fp8,0,0.058304001887639366
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,4,128,1,fp8,fp8,0,0.06262399752934773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,4,128,1,float16,fp8,0,0.06302933394908905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,8,128,1,float16,float16,0,0.04072533299525579
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,8,4,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,8,128,1,float16,fp8,0,0.04038399954636892
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,8,128,1,fp8,fp8,0,0.03937066594759623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,1,128,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,1,128,1,float16,float16,0,0.03519999980926514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,1,128,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,2,128,1,float16,fp8,0,0.03605866680542628
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,2,128,1,fp8,fp8,0,0.03402133285999298
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,4,128,1,float16,fp8,0,0.03735466549793879
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,2,128,1,float16,float16,0,0.036362667878468834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,4,128,1,float16,float16,0,0.03775466730197271
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,8,128,1,float16,float16,0,0.02698666602373123
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,8,128,1,float16,fp8,0,0.027024000883102417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,8,4,128,1,fp8,fp8,0,0.03648533423741659
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,8,128,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,1,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,2,128,1,float16,float16,0,0.02566933383544286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,2,128,1,float16,fp8,0,0.025583999852339428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,2,128,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,1,128,1,float16,float16,0,0.02474133421977361
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,4,128,1,float16,float16,0,0.02585600068171819
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,1,128,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,4,128,1,float16,fp8,0,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,8,128,1,float16,float16,0,0.022687998910744984
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,8,4,128,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,8,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,8,128,1,float16,fp8,0,0.022266666094462078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,1,128,1,float16,fp8,0,0.02201066662867864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,1,128,1,float16,float16,0,0.021695998807748158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,2,128,1,float16,fp8,0,0.02229333420594533
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,2,128,1,float16,float16,0,0.02181333303451538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,4,128,1,float16,float16,0,0.02239466706911723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,2,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,1,128,1,fp8,fp8,0,0.020661332954963047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,4,128,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,8,128,1,float16,float16,0,0.02080533280968666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,8,128,1,float16,fp8,0,0.021514666577180225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,1,128,1,float16,float16,0,0.020703999946514767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,1,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,8,4,128,1,float16,fp8,0,0.022485333184401195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,8,128,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,1,128,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,2,128,1,float16,float16,0,0.020554666717847187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,2,128,1,float16,fp8,0,0.020794666061798733
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,2,128,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,4,128,1,float16,float16,0,0.020586666961510975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,4,128,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,8,128,1,float16,float16,0,0.019445333629846573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,1,128,1,float16,float16,0,0.020010666300853092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,8,128,1,float16,fp8,0,0.020175999651352566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,8,4,128,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,1,128,1,float16,fp8,0,0.02073066681623459
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,1,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,8,128,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,2,128,1,float16,float16,0,0.0201706662774086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,2,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,2,128,1,float16,fp8,0,0.020869334538777668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,4,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,4,128,1,float16,fp8,0,0.020346666375796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,8,4,128,1,float16,float16,0,0.02063999945918719
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,1,128,1,float16,float16,0,0.1683786710103353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,1,128,1,fp8,fp8,0,0.17433599630991617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,1,128,1,float16,fp8,0,0.1679733395576477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,2,128,1,float16,float16,0,0.1727786660194397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,2,128,1,fp8,fp8,0,0.18392000595728555
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,2,128,1,float16,fp8,0,0.17194666465123495
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,4,128,1,float16,float16,0,0.18563199043273926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,1,128,1,float16,float16,0,0.09529067079226176
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,8,128,1,float16,float16,0,0.12408000230789185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,8,128,1,float16,fp8,0,0.11731200416882832
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,1,128,1,float16,fp8,0,0.09563199679056804
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,4,128,1,float16,fp8,0,0.18125865856806436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,8,4,128,1,fp8,fp8,0,0.20646933714548746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,8,128,1,fp8,fp8,0,0.1146399974822998
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,1,128,1,fp8,fp8,0,0.0974079966545105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,2,128,1,float16,float16,0,0.09888000289599101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,2,128,1,fp8,fp8,0,0.10196800033251445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,2,128,1,float16,fp8,0,0.09882666667302449
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,4,128,1,float16,float16,0,0.10583999752998352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,8,128,1,float16,float16,0,0.06677333513895671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,4,128,1,fp8,fp8,0,0.11121066411336263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,8,128,1,float16,fp8,0,0.0653653343518575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,8,4,128,1,float16,fp8,0,0.10426666339238484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,1,128,1,float16,float16,0,0.05395199855168661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,1,128,1,float16,fp8,0,0.054602667689323425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,1,128,1,fp8,fp8,0,0.05459199845790863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,8,128,1,fp8,fp8,0,0.06574933230876923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,2,128,1,float16,fp8,0,0.05648000041643778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,2,128,1,float16,float16,0,0.056405335664749146
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,2,128,1,fp8,fp8,0,0.057445332407951355
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,4,128,1,float16,fp8,0,0.05963733295599619
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,8,128,1,float16,float16,0,0.038805333276589714
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,4,128,1,fp8,fp8,0,0.06306133170922597
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,8,4,128,1,float16,float16,0,0.06073066592216492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,8,128,1,float16,fp8,0,0.038032000263532005
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,1,128,1,float16,float16,0,0.03391999999682108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,8,128,1,fp8,fp8,0,0.03842133283615112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,1,128,1,float16,fp8,0,0.03442666679620743
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,1,128,1,fp8,fp8,0,0.033759998778502144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,2,128,1,float16,float16,0,0.03499733408292135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,4,128,1,float16,float16,0,0.03678400069475174
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,2,128,1,float16,fp8,0,0.0353973334034284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,4,128,1,float16,fp8,0,0.035818666219711304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,2,128,1,fp8,fp8,0,0.03411199897527695
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,8,4,128,1,fp8,fp8,0,0.036890665690104164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,1,128,1,float16,float16,0,0.024186665813128155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,8,128,1,float16,float16,0,0.025701334079106648
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,1,128,1,float16,fp8,0,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,1,128,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,8,128,1,float16,fp8,0,0.026202666262785595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,8,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,2,128,1,fp8,fp8,0,0.023925334215164185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,2,128,1,float16,float16,0,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,4,128,1,float16,float16,0,0.025514667232831318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,4,128,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,8,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,4,128,1,fp8,fp8,0,0.02475200096766154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,8,128,1,float16,float16,0,0.020975999534130096
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,8,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,8,2,128,1,float16,fp8,0,0.02463999887307485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,2,128,1,float16,float16,0,0.02075200031201045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,1,128,1,fp8,fp8,0,0.0204373337328434
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,1,128,1,float16,float16,0,0.02056533346573512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,1,128,1,float16,fp8,0,0.020869334538777668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,2,128,1,fp8,fp8,0,0.02040533348917961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,2,128,1,float16,fp8,0,0.020784000555674236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,4,128,1,float16,float16,0,0.02092266579469045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,4,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,8,4,128,1,fp8,fp8,0,0.02178666740655899
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,8,128,1,float16,float16,0,0.018837332725524902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,1,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,1,128,1,float16,fp8,0,0.019567999988794327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,1,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,8,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,8,128,1,float16,fp8,0,0.01959466685851415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,2,128,1,float16,fp8,0,0.019973333925008774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,2,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,4,128,1,float16,float16,0,0.019498666127522785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,4,128,1,float16,fp8,0,0.019637333850065868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,2,128,1,fp8,fp8,0,0.01964266722400983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,8,128,1,float16,float16,0,0.01802666609485944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,8,128,1,fp8,fp8,0,0.01874133323629697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,8,4,128,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,1,128,1,float16,float16,0,0.018378666291634243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,8,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,1,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,1,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,2,128,1,float16,float16,0,0.018858666221300762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,2,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,2,128,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,4,128,1,float16,float16,0,0.01874133323629697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,8,128,1,float16,fp8,0,0.017957333475351334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,4,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,1,128,1,float16,float16,0,0.018735999862353008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,8,128,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,8,128,1,float16,float16,0,0.01802666609485944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,8,4,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,1,128,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,2,128,1,float16,float16,0,0.017829333742459614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,2,128,1,fp8,fp8,0,0.018746666610240936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,1,128,1,float16,fp8,0,0.01836799954374631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,2,128,1,float16,fp8,0,0.018426666657129925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,4,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,4,128,1,float16,float16,0,0.02186133215824763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,8,4,128,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,1,128,1,float16,float16,0,0.11318932970364888
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,1,128,1,float16,fp8,0,0.11261866490046184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,1,128,1,fp8,fp8,0,0.12494400143623352
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,2,128,1,float16,fp8,0,0.1144480009873708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,2,128,1,float16,float16,0,0.11493333180745442
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,4,128,1,float16,float16,0,0.12334400415420532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,2,128,1,fp8,fp8,0,0.1288053294022878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,8,128,1,float16,float16,0,0.07069866855939229
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,8,128,1,float16,fp8,0,0.06885333359241486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,4,128,1,fp8,fp8,0,0.13904533783594766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,1,128,1,float16,float16,0,0.06385600070158641
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,8,4,128,1,float16,fp8,0,0.12004266182581584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,1,128,1,float16,fp8,0,0.06411199768384297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,8,128,1,fp8,fp8,0,0.07931200166543324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,1,128,1,fp8,fp8,0,0.06925333539644878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,2,128,1,float16,float16,0,0.06502933303515117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,2,128,1,float16,fp8,0,0.0651093324025472
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,2,128,1,fp8,fp8,0,0.07150933146476746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,4,128,1,fp8,fp8,0,0.07732800145943959
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,8,128,1,float16,float16,0,0.04042666653792063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,1,128,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,4,128,1,float16,float16,0,0.06818666557470958
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,8,128,1,float16,fp8,0,0.0390079990029335
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,8,4,128,1,float16,fp8,0,0.0682826687892278
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,8,128,1,fp8,fp8,0,0.04671466847260793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,1,128,1,float16,fp8,0,0.03823466598987579
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,2,128,1,float16,fp8,0,0.03904533386230469
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,1,128,1,fp8,fp8,0,0.041802664597829185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,2,128,1,float16,float16,0,0.03865066667397817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,4,128,1,float16,float16,0,0.039781334499518074
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,4,128,1,fp8,fp8,0,0.044480000933011375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,8,128,1,float16,float16,0,0.027232001225153606
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,2,128,1,fp8,fp8,0,0.04173333446184794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,8,4,128,1,float16,fp8,0,0.040037333965301514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,8,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,8,128,1,fp8,fp8,0,0.02869333326816559
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,1,128,1,float16,float16,0,0.025957333544890087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,1,128,1,float16,fp8,0,0.025888000925381977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,1,128,1,fp8,fp8,0,0.02741866558790207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,2,128,1,float16,float16,0,0.026346666117509205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,4,128,1,float16,float16,0,0.027189334233601887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,2,128,1,float16,fp8,0,0.02672533442576726
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,2,128,1,fp8,fp8,0,0.02788266787926356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,4,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,8,4,128,1,fp8,fp8,0,0.028704000016053517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,1,128,1,float16,float16,0,0.019989332805077236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,8,128,1,float16,float16,0,0.020981334149837494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,8,128,1,float16,fp8,0,0.02049066623051961
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,8,128,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,2,128,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,2,128,1,float16,float16,0,0.01998399943113327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,1,128,1,float16,fp8,0,0.019834666202465694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,2,128,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,4,128,1,float16,float16,0,0.020143999407688778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,4,128,1,float16,fp8,0,0.019808000574509304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,1,128,1,fp8,fp8,0,0.020527999848127365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,8,4,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,8,128,1,float16,float16,0,0.018058666338523228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,8,128,1,float16,fp8,0,0.018229333062966663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,2,128,1,fp8,fp8,0,0.019706666469573975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,1,128,1,float16,fp8,0,0.018320000420014065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,1,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,1,128,1,float16,float16,0,0.0179626668492953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,2,128,1,float16,float16,0,0.018618666877349217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,8,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,4,128,1,float16,fp8,0,0.018735999862353008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,4,128,1,float16,float16,0,0.018570666511853535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,8,4,128,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,8,128,1,float16,float16,0,0.017375999440749485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,8,128,1,float16,fp8,0,0.017743999759356182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,8,128,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,1,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,2,128,1,float16,float16,0,0.017984000345071156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,1,128,1,float16,float16,0,0.017690667261679966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,1,128,1,float16,fp8,0,0.017610666652520496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,4,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,2,128,1,float16,fp8,0,0.018191999445358913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,2,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,4,128,1,float16,float16,0,0.017375999440749485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,8,4,128,1,float16,fp8,0,0.01781333362062772
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,8,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,1,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,1,128,1,fp8,fp8,0,0.01823466643691063
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,1,128,1,float16,fp8,0,0.01794133335351944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,2,128,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,8,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,8,128,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,4,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,2,128,1,float16,fp8,0,0.01777600000301997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,2,128,1,fp8,fp8,0,0.018346666047970455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,1,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,4,128,1,float16,fp8,0,0.017621333400408428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,8,4,128,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,8,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,1,128,1,fp8,fp8,0,0.01794133335351944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,1,128,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,8,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,8,128,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,2,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,2,128,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,2,128,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,4,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,4,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,8,4,128,1,fp8,fp8,0,0.01833600054184596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,1,128,1,fp8,fp8,0,0.10145599643389384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,1,128,1,float16,fp8,0,0.0792799989382426
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,1,128,1,float16,float16,0,0.07931733131408691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,2,128,1,float16,float16,0,0.08032533526420593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,2,128,1,float16,fp8,0,0.07961066563924153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,2,128,1,fp8,fp8,0,0.10358933607737224
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,4,128,1,float16,float16,0,0.08515200018882751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,4,128,1,float16,fp8,0,0.08470400174458821
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,8,4,128,1,fp8,fp8,0,0.10801600416501363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,8,128,1,float16,fp8,0,0.04674133161703745
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,8,128,1,float16,float16,0,0.04765866696834564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,8,128,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,1,128,1,float16,float16,0,0.045594667394955955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,1,128,1,float16,fp8,0,0.04508799811204275
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,1,128,1,fp8,fp8,0,0.05752533177534739
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,2,128,1,float16,fp8,0,0.0470719983180364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,2,128,1,float16,float16,0,0.045509333411852516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,4,128,1,float16,fp8,0,0.0469813346862793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,4,128,1,fp8,fp8,0,0.06006399790445963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,4,128,1,float16,float16,0,0.0476800004641215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,8,2,128,1,fp8,fp8,0,0.057855998476346336
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,8,128,1,float16,fp8,0,0.030213333666324615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,8,128,1,fp8,fp8,0,0.037290667494138084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,1,128,1,float16,float16,0,0.029509333272775013
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,1,128,1,float16,fp8,0,0.03032533327738444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,8,128,1,float16,float16,0,0.02962133288383484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,1,128,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,2,128,1,float16,float16,0,0.03031466652949651
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,4,128,1,float16,float16,0,0.03142933299144109
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,2,128,1,fp8,fp8,0,0.036687999963760376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,2,128,1,float16,fp8,0,0.030320001145203907
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,4,128,1,float16,fp8,0,0.031199999153614044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,8,4,128,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,8,128,1,float16,fp8,0,0.02218666672706604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,1,128,1,float16,float16,0,0.021920000513394673
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,8,128,1,float16,float16,0,0.02186666677395503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,8,128,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,1,128,1,float16,fp8,0,0.022346665461858112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,2,128,1,float16,fp8,0,0.02219199885924657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,2,128,1,fp8,fp8,0,0.024933333198229473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,2,128,1,float16,float16,0,0.021925332645575207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,1,128,1,fp8,fp8,0,0.024720000723997753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,4,128,1,float16,float16,0,0.0225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,8,128,1,float16,float16,0,0.017952000101407368
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,8,128,1,float16,fp8,0,0.01836266616980235
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,4,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,8,128,1,fp8,fp8,0,0.020186666399240494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,8,4,128,1,float16,fp8,0,0.022240000466505688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,1,128,1,float16,float16,0,0.017456000049908955
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,1,128,1,fp8,fp8,0,0.019823999454577763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,1,128,1,float16,fp8,0,0.018218666315078735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,2,128,1,float16,fp8,0,0.018138666947682697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,2,128,1,float16,float16,0,0.017786666750907898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,4,128,1,float16,float16,0,0.017786666750907898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,2,128,1,fp8,fp8,0,0.01964266722400983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,8,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,4,128,1,fp8,fp8,0,0.01995733380317688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,8,4,128,1,float16,fp8,0,0.017877332866191864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,8,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,1,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,8,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,1,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,1,128,1,fp8,fp8,0,0.01842133328318596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,2,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,4,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,2,128,1,float16,fp8,0,0.017765333255132038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,4,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,2,128,1,fp8,fp8,0,0.018687999496857326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,8,128,1,float16,float16,0,0.01621333385507266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,8,128,1,fp8,fp8,0,0.0182239996890227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,8,128,1,float16,fp8,0,0.016517333686351776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,8,4,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,1,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,2,128,1,fp8,fp8,0,0.01855466639002164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,1,128,1,fp8,fp8,0,0.018277333428462345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,2,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,1,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,4,128,1,float16,float16,0,0.016549333930015564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,2,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,8,128,1,float16,float16,0,0.01617066686352094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,4,128,1,fp8,fp8,0,0.018288000176350277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,8,4,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,8,128,1,float16,fp8,0,0.01646399994691213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,8,128,1,fp8,fp8,0,0.018160000443458557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,1,128,1,float16,float16,0,0.016303999970356624
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,2,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,1,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,2,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,4,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,4,128,1,float16,float16,0,0.016271999726692837
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,1,128,1,fp8,fp8,0,0.01817600056529045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,2,128,1,fp8,fp8,0,0.01848000039656957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,8,128,1,float16,float16,0,0.015583999454975128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,8,4,128,1,fp8,fp8,0,0.01807466646035512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,8,128,1,fp8,fp8,0,0.017829333742459614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,1,128,1,float16,float16,0,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,1,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,8,128,1,float16,fp8,0,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,2,128,1,float16,fp8,0,0.016421332955360413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,1,128,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,2,128,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,4,128,1,float16,float16,0,0.01594666639963786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,2,128,1,float16,float16,0,0.016336000214020412
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,4,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,8,4,128,1,fp8,fp8,0,0.01844266677896182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,1,128,1,float16,fp8,0,0.0689279983441035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,1,128,1,float16,float16,0,0.06816533207893372
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,2,128,1,float16,fp8,0,0.0683840016523997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,4,128,1,float16,float16,0,0.06992533306280772
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,2,128,1,float16,float16,0,0.06832533578077953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,4,128,1,float16,fp8,0,0.06965333223342896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,2,128,1,fp8,fp8,0,0.08981333176294963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,1,128,1,fp8,fp8,0,0.08851200342178345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,8,4,128,1,fp8,fp8,0,0.09265599648157756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,8,128,1,float16,fp8,0,0.03957333415746689
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,2,128,1,float16,fp8,0,0.042133331298828125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,1,128,1,float16,fp8,0,0.04115733255942663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,8,128,1,fp8,fp8,0,0.05243200063705444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,8,128,1,float16,float16,0,0.03923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,2,128,1,float16,float16,0,0.04101333270470301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,1,128,1,float16,float16,0,0.040847999354203544
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,1,128,1,fp8,fp8,0,0.051728000243504844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,4,128,1,float16,float16,0,0.04194133480389913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,2,128,1,fp8,fp8,0,0.05221333106358846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,4,128,1,float16,fp8,0,0.04196799794832865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,8,4,128,1,fp8,fp8,0,0.05293866495291392
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,1,128,1,float16,fp8,0,0.028117333849271137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,8,128,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,8,128,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,8,128,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,1,128,1,float16,float16,0,0.027717334528764088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,1,128,1,fp8,fp8,0,0.03316266586383184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,2,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,2,128,1,float16,fp8,0,0.027888000011444092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,2,128,1,fp8,fp8,0,0.033674667278925575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,4,128,1,float16,fp8,0,0.028768000503381092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,8,128,1,float16,fp8,0,0.02067733307679494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,4,128,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,1,128,1,float16,float16,0,0.020938667158285778
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,8,128,1,fp8,fp8,0,0.023711999257405598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,1,128,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,8,4,128,1,fp8,fp8,0,0.03384533276160558
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,8,128,1,float16,float16,0,0.02086399992307027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,2,128,1,float16,float16,0,0.020874666670958202
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,4,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,2,128,1,fp8,fp8,0,0.023898666103680927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,1,128,1,fp8,fp8,0,0.02380799998839696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,4,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,2,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,8,128,1,float16,float16,0,0.017445333302021027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,8,128,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,8,128,1,float16,fp8,0,0.017893332988023758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,1,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,8,4,128,1,fp8,fp8,0,0.02372266600529353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,1,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,1,128,1,float16,fp8,0,0.0176959993938605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,2,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,2,128,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,4,128,1,float16,float16,0,0.017557332913080852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,4,128,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,8,128,1,float16,float16,0,0.016783999900023144
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,2,128,1,float16,fp8,0,0.017792000124851864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,8,128,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,8,128,1,fp8,fp8,0,0.018351999421914417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,1,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,8,4,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,1,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,1,128,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,2,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,4,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,2,128,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,2,128,1,fp8,fp8,0,0.018186666071414948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,4,128,1,float16,float16,0,0.01624533285697301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,8,128,1,float16,float16,0,0.016154666741689045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,8,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,8,128,1,fp8,fp8,0,0.018160000443458557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,8,4,128,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,1,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,2,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,1,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,2,128,1,fp8,fp8,0,0.018053332964579265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,1,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,4,128,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,4,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,2,128,1,float16,float16,0,0.023024000227451324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,8,4,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,8,128,1,float16,float16,0,0.01605333387851715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,1,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,8,128,1,float16,fp8,0,0.01617066686352094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,8,128,1,fp8,fp8,0,0.017765333255132038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,1,128,1,fp8,fp8,0,0.017935999979575474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,1,128,1,float16,float16,0,0.016501333564519882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,2,128,1,float16,float16,0,0.01642666632930438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,2,128,1,fp8,fp8,0,0.018112000077962875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,4,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,2,128,1,float16,fp8,0,0.016480000068744022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,4,128,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,8,4,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,8,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,1,128,1,float16,fp8,0,0.01643199970324834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,8,128,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,1,128,1,float16,float16,0,0.015552000453074774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,8,128,1,float16,fp8,0,0.015829333414634068
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,2,128,1,fp8,fp8,0,0.01785600061217944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,2,128,1,float16,float16,0,0.016208000481128693
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,2,128,1,float16,fp8,0,0.016048000504573185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,1,128,1,fp8,fp8,0,0.017701332767804463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,4,128,1,float16,float16,0,0.016250666230916977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,4,128,1,fp8,fp8,0,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,8,4,128,1,float16,fp8,0,0.016048000504573185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,128,1,float16,float16,0,4.1202131907145185
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,128,1,float16,float16,0,2.0652213096618652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,128,1,float16,fp8,0,4.261839866638184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,128,1,fp8,fp8,0,2.7069546381632485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,128,1,fp8,fp8,0,2.7215681076049805
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,128,1,float16,fp8,0,2.033679962158203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,128,1,float16,float16,0,4.01580810546875
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,128,1,float16,fp8,0,4.148394584655762
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,128,1,float16,float16,0,2.0938026110331216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,128,1,float16,fp8,0,2.0855466524759927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,128,1,fp8,fp8,0,1.4178293546040852
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,128,1,float16,float16,0,2.0745760599772134
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,128,1,float16,float16,0,1.103882630666097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,128,1,float16,fp8,0,2.0664587020874023
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,128,1,fp8,fp8,0,1.4332159360249836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,128,1,fp8,fp8,0,1.4208319981892903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,128,1,float16,fp8,0,1.1124693552652996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,128,1,fp8,fp8,0,0.6999306678771973
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,128,1,float16,float16,0,1.1245013078053792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,128,1,float16,fp8,0,1.1182453632354736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,128,1,fp8,fp8,0,0.6981653372446696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,128,1,float16,float16,0,1.1255573431650798
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,128,1,float16,fp8,0,1.1265973250071208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,128,1,float16,float16,0,0.5179466803868612
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,128,1,float16,fp8,0,0.5163360039393107
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,128,1,float16,float16,0,0.519050677617391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,128,1,fp8,fp8,0,0.7033653259277344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,128,1,fp8,fp8,0,0.37219734986623126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,128,1,fp8,fp8,0,0.3713546593983968
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,128,1,float16,fp8,0,0.5167680184046427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,128,1,float16,fp8,0,0.5189599990844727
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,128,1,float16,float16,0,0.5228213469187418
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,128,1,fp8,fp8,0,0.3755626678466797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,128,1,float16,float16,0,2.341119925181071
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,128,1,fp8,fp8,0,1.6261173884073894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,128,1,float16,fp8,0,2.330714702606201
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,128,1,float16,float16,0,2.3498133023579917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,128,1,float16,float16,0,1.2353386878967285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,128,1,float16,fp8,0,1.2364479700724285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,128,1,fp8,fp8,0,1.644938627878825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,128,1,float16,float16,0,1.240613301595052
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,128,1,float16,fp8,0,2.340170701344808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,128,1,fp8,fp8,0,0.8150186538696289
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,128,1,float16,fp8,0,1.2335093021392822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,128,1,fp8,fp8,0,0.8076159954071045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,128,1,float16,float16,0,1.250928004582723
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,128,1,float16,fp8,0,1.2496480147043865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,128,1,fp8,fp8,0,0.8188532988230387
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,128,1,float16,float16,0,0.5864266554514567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,128,1,float16,fp8,0,0.5830133358637491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,128,1,fp8,fp8,0,0.42685866355895996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,128,1,float16,float16,0,0.5870933135350546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,128,1,float16,fp8,0,0.5841066837310791
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,128,1,fp8,fp8,0,0.42530667781829834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,128,1,float16,float16,0,0.5945493380228678
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,128,1,float16,fp8,0,0.5895146528879801
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,128,1,fp8,fp8,0,0.42716264724731445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,128,1,float16,float16,0,0.33608531951904297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,128,1,float16,fp8,0,0.3325813412666321
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,128,1,float16,float16,0,0.3341493209203084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,128,1,fp8,fp8,0,0.24748265743255615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,128,1,float16,fp8,0,0.33555734157562256
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,128,1,fp8,fp8,0,0.24448533852895102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,128,1,float16,float16,0,0.3374933401743571
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,128,1,float16,fp8,0,0.3352320194244385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,128,1,fp8,fp8,0,0.2481173276901245
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,128,1,float16,float16,0,1.6848427454630535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,128,1,float16,fp8,0,1.6755199432373047
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,128,1,fp8,fp8,0,1.193557341893514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,128,1,float16,float16,0,0.8938666979471842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,128,1,float16,fp8,0,1.703829288482666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,128,1,fp8,fp8,0,1.2046240170796711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,128,1,float16,float16,0,1.704037348429362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,128,1,float16,fp8,0,0.8896160125732422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,128,1,fp8,fp8,0,0.5984106858571371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,128,1,fp8,fp8,0,0.589679996172587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,128,1,float16,float16,0,0.901093324025472
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,128,1,float16,float16,0,0.9133386611938477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,128,1,float16,fp8,0,0.9071199893951416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,128,1,float16,float16,0,0.43110398451487225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,128,1,float16,fp8,0,0.8967466354370117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,128,1,fp8,fp8,0,0.5998560190200806
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,128,1,float16,fp8,0,0.4293706814448039
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,128,1,float16,fp8,0,0.43248534202575684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,128,1,fp8,fp8,0,0.3184960087140401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,128,1,float16,float16,0,0.43398932615915936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,128,1,fp8,fp8,0,0.3208693265914917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,128,1,float16,float16,0,0.43826134999593097
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,128,1,float16,fp8,0,0.4341333309809367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,128,1,float16,float16,0,0.2462773323059082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,128,1,fp8,fp8,0,0.32225600878397626
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,128,1,float16,fp8,0,0.24595733483632407
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,128,1,fp8,fp8,0,0.1869279940923055
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,128,1,float16,fp8,0,0.2435093323389689
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,128,1,float16,float16,0,0.24579199155171713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,128,1,fp8,fp8,0,0.1835520068804423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,128,1,float16,fp8,0,0.24627200762430826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,128,1,float16,float16,0,0.2469493349393209
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,128,1,fp8,fp8,0,0.18577067057291666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,128,1,float16,float16,0,2.2066027323404946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,128,1,float16,fp8,0,2.1816372871398926
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,128,1,fp8,fp8,0,1.5595626831054688
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,128,1,float16,fp8,0,1.1351306438446045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,128,1,float16,fp8,0,2.1731786727905273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,128,1,float16,float16,0,1.1350026925404866
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,128,1,fp8,fp8,0,1.5795787175496419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,128,1,float16,float16,0,2.1905760765075684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,128,1,float16,float16,0,1.1482826868693035
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,128,1,fp8,fp8,0,0.8355573018391927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,128,1,float16,fp8,0,1.128058671951294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,128,1,fp8,fp8,0,0.8226986726125082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,128,1,float16,float16,0,1.1443466345469158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,128,1,float16,float16,0,0.6135466496149699
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,128,1,fp8,fp8,0,0.8284853299458822
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,128,1,float16,fp8,0,1.1363360087076824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,128,1,float16,fp8,0,0.6164106527964274
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,128,1,fp8,fp8,0,0.4188266595204671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,128,1,float16,float16,0,0.6183146635691324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,128,1,fp8,fp8,0,0.40969598293304443
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,128,1,float16,fp8,0,0.6166186730066935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,128,1,float16,fp8,0,0.6178986628850301
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,128,1,float16,float16,0,0.623631993929545
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,128,1,fp8,fp8,0,0.41627732912699383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,128,1,float16,float16,0,0.2910933295885722
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,128,1,float16,float16,0,0.29021867116292316
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,128,1,float16,fp8,0,0.29120532671610516
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,128,1,fp8,fp8,0,0.21887467304865518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,128,1,float16,fp8,0,0.28885332743326825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,128,1,fp8,fp8,0,0.2237173318862915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,128,1,float16,fp8,0,0.29028799136479694
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,128,1,float16,float16,0,0.2925493319829305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,128,1,fp8,fp8,0,0.22270933787027994
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,128,1,float16,float16,0,0.16004799803098044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,128,1,float16,fp8,0,0.1595253348350525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,128,1,fp8,fp8,0,0.12533866365750632
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,128,1,float16,float16,0,0.15993066628774008
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,128,1,float16,fp8,0,0.15770133336385092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,128,1,fp8,fp8,0,0.12196266651153564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,128,1,fp8,fp8,0,0.12363732854525249
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,128,1,float16,fp8,0,0.15893866618474325
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,128,1,float16,float16,0,0.15973333517710367
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,128,1,float16,float16,0,1.332319974899292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,128,1,float16,fp8,0,1.3023040294647217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,128,1,fp8,fp8,0,0.9755307038625082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,128,1,float16,float16,0,0.7040639718373617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,128,1,float16,fp8,0,0.6930133501688639
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,128,1,float16,fp8,0,1.3191306591033936
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,128,1,fp8,fp8,0,0.9934613704681396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,128,1,float16,float16,0,1.3253066539764404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,128,1,float16,float16,0,0.697381337483724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,128,1,fp8,fp8,0,0.5079840024312338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,128,1,float16,fp8,0,0.6923306783040365
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,128,1,fp8,fp8,0,0.48950934410095215
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,128,1,float16,float16,0,0.7037920157114664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,128,1,float16,fp8,0,0.6966346899668375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,128,1,fp8,fp8,0,0.4997653166453044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,128,1,float16,fp8,0,0.33741867542266846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,128,1,float16,float16,0,0.33882665634155273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,128,1,float16,float16,0,0.33638401826222736
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,128,1,fp8,fp8,0,0.2687679926554362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,128,1,float16,fp8,0,0.3336533308029175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,128,1,float16,float16,0,0.3403466542561849
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,128,1,fp8,fp8,0,0.25994666417439777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,128,1,float16,fp8,0,0.33642133076985675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,128,1,fp8,fp8,0,0.26366400718688965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,128,1,fp8,fp8,0,0.15416533748308817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,128,1,float16,fp8,0,0.1941866676012675
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,128,1,float16,float16,0,0.19021334250768027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,128,1,float16,float16,0,0.19208532571792603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,128,1,float16,fp8,0,0.18864534298578897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,128,1,float16,float16,0,0.1924053430557251
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,128,1,float16,fp8,0,0.1920479933420817
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,128,1,fp8,fp8,0,0.14671466747919717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,128,1,float16,fp8,0,0.12039466698964436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,128,1,fp8,fp8,0,0.15180800358454385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,128,1,fp8,fp8,0,0.09442666172981262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,128,1,float16,float16,0,0.12065066893895467
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,128,1,fp8,fp8,0,0.09265599648157756
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,128,1,float16,fp8,0,0.12034133076667786
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,128,1,float16,float16,0,0.12135466933250427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,128,1,float16,float16,0,0.12145066261291504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,128,1,float16,fp8,0,0.12116266290346782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,128,1,fp8,fp8,0,0.09398933251698811
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,128,1,float16,fp8,0,1.2780853112538655
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,128,1,fp8,fp8,0,0.9986666838328043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,128,1,float16,float16,0,1.302634636561076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,128,1,float16,float16,0,0.6851147015889486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,128,1,float16,fp8,0,0.6793226401011149
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,128,1,float16,float16,0,1.3218453725179036
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,128,1,fp8,fp8,0,1.0159093538920085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,128,1,float16,fp8,0,1.288101355234782
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,128,1,float16,float16,0,0.6761653423309326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,128,1,fp8,fp8,0,0.5239573319753011
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,128,1,float16,fp8,0,0.6630986531575521
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,128,1,float16,fp8,0,0.6712213357289633
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,128,1,float16,float16,0,0.6845333576202393
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,128,1,fp8,fp8,0,0.5448160171508789
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,128,1,fp8,fp8,0,0.5325813293457031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,128,1,float16,float16,0,0.369269331296285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,128,1,float16,fp8,0,0.36566932996114093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,128,1,float16,fp8,0,0.3609386682510376
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,128,1,fp8,fp8,0,0.26605866352717084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,128,1,fp8,fp8,0,0.27697600920995075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,128,1,float16,float16,0,0.37068267663319904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,128,1,float16,float16,0,0.3640213410059611
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,128,1,float16,fp8,0,0.3651946783065796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,128,1,fp8,fp8,0,0.2715733249982198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,128,1,float16,float16,0,0.18051199118296304
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,128,1,float16,float16,0,0.17593065897623697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,128,1,float16,fp8,0,0.17915733655293783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,128,1,fp8,fp8,0,0.1516639987627665
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,128,1,float16,fp8,0,0.17352533340454102
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,128,1,float16,float16,0,0.178165336449941
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,128,1,fp8,fp8,0,0.1432213286558787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,128,1,fp8,fp8,0,0.14630400141080221
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,128,1,float16,fp8,0,0.17646400133768717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,128,1,float16,fp8,0,0.09946133693059285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,128,1,float16,float16,0,0.09956266482671101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,128,1,fp8,fp8,0,0.08589866757392883
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,128,1,float16,float16,0,0.09732266267140706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,128,1,float16,fp8,0,0.0958186686038971
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,128,1,fp8,fp8,0,0.07969599962234497
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,128,1,float16,float16,0,0.09732799728711446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,128,1,fp8,fp8,0,0.08197333415349324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,128,1,float16,fp8,0,0.09687466422716777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,128,1,float16,float16,0,0.08534933129946391
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,128,1,float16,fp8,0,0.08515200018882751
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,128,1,float16,float16,0,0.08538132905960083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,128,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,128,1,fp8,fp8,0,0.06728533407052358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,128,1,float16,float16,0,0.08598400155703227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,128,1,fp8,fp8,0,0.06760533154010773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,128,1,float16,fp8,0,0.08566400408744812
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,128,1,float16,fp8,0,0.08515733480453491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,128,1,float16,float16,0,0.8196586767832438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,128,1,fp8,fp8,0,0.6530186732610067
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,128,1,float16,fp8,0,0.7987893422444662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,128,1,float16,float16,0,0.43597865104675293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,128,1,float16,float16,0,0.8219199975331625
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,128,1,float16,fp8,0,0.8098719914754232
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,128,1,fp8,fp8,0,0.6647679805755615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,128,1,float16,fp8,0,0.43025068442026776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,128,1,float16,float16,0,0.42962666352589923
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,128,1,float16,fp8,0,0.4236160119374593
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,128,1,fp8,fp8,0,0.35468800862630206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,128,1,fp8,fp8,0,0.3306293288866679
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,128,1,float16,float16,0,0.4325066804885864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,128,1,float16,fp8,0,0.42747199535369873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,128,1,fp8,fp8,0,0.3399146795272827
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,128,1,float16,fp8,0,0.22011733055114746
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,128,1,float16,float16,0,0.21887999773025513
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,128,1,fp8,fp8,0,0.18662399053573608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,128,1,float16,float16,0,0.2116373380025228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,128,1,float16,fp8,0,0.20864532391230264
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,128,1,fp8,fp8,0,0.1758506695429484
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,128,1,float16,float16,0,0.21500267585118613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,128,1,float16,fp8,0,0.21355199813842773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,128,1,float16,fp8,0,0.12226133545239766
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,128,1,float16,float16,0,0.12220799922943115
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,128,1,fp8,fp8,0,0.10610666871070862
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,128,1,fp8,fp8,0,0.18074132998784384
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,128,1,float16,float16,0,0.11820266644159953
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,128,1,float16,fp8,0,0.11577600240707397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,128,1,fp8,fp8,0,0.09757333000500996
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,128,1,float16,float16,0,0.11950400471687317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,128,1,float16,fp8,0,0.11948266625404358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,128,1,float16,float16,0,0.07373866438865662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,128,1,float16,fp8,0,0.07396266857783
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,128,1,fp8,fp8,0,0.06323199967543285
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,128,1,float16,float16,0,0.07322666545708974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,128,1,fp8,fp8,0,0.1018933355808258
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,128,1,float16,fp8,0,0.07266133526961009
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,128,1,fp8,fp8,0,0.06005333364009857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,128,1,float16,float16,0,0.07397333284219106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,128,1,float16,fp8,0,0.07332799832026164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,128,1,fp8,fp8,0,0.06051200131575266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,128,1,float16,fp8,0,0.06736533343791962
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,128,1,float16,float16,0,0.0673333356777827
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,128,1,float16,float16,0,0.0681279997030894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,128,1,fp8,fp8,0,0.054560000697771706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,128,1,float16,fp8,0,0.0676693320274353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,128,1,fp8,fp8,0,0.05423999826113383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,128,1,float16,float16,0,0.06858666737874348
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,128,1,float16,fp8,0,0.06769066552321117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,128,1,fp8,fp8,0,0.05518400172392527
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,128,1,float16,float16,0,0.8849759896596273
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,128,1,float16,fp8,0,0.832581361134847
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,128,1,fp8,fp8,0,0.7432746887207031
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,128,1,float16,float16,0,0.47755201657613117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,128,1,float16,float16,0,0.9024853706359863
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,128,1,float16,fp8,0,0.8451626300811768
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,128,1,float16,fp8,0,0.4508533477783203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,128,1,fp8,fp8,0,0.7635467052459717
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,128,1,float16,float16,0,0.4598986705144246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,128,1,float16,fp8,0,0.43594666322072345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,128,1,fp8,fp8,0,0.38741334279378253
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,128,1,fp8,fp8,0,0.4096533457438151
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,128,1,float16,float16,0,0.2548000017801921
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,128,1,float16,float16,0,0.468176007270813
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,128,1,float16,fp8,0,0.442469318707784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,128,1,fp8,fp8,0,0.3991413513819377
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,128,1,float16,float16,0,0.2453440030415853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,128,1,float16,fp8,0,0.23418132464090982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,128,1,float16,fp8,0,0.2437973419825236
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,128,1,fp8,fp8,0,0.19646400213241577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,128,1,fp8,fp8,0,0.2095306714375814
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,128,1,float16,float16,0,0.24804800748825073
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,128,1,float16,fp8,0,0.23857067028681436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,128,1,fp8,fp8,0,0.2016213337580363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,128,1,float16,float16,0,0.12728533148765564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,128,1,fp8,fp8,0,0.11545600493748982
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,128,1,float16,fp8,0,0.12410133083661397
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,128,1,float16,float16,0,0.11982400218645732
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,128,1,float16,fp8,0,0.11726933717727661
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,128,1,fp8,fp8,0,0.10726400216420491
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,128,1,float16,float16,0,0.12211199601491292
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,128,1,float16,fp8,0,0.11998400092124939
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,128,1,fp8,fp8,0,0.11110400160153706
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,128,1,float16,float16,0,0.07082666456699371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,128,1,float16,fp8,0,0.06981866558392842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,128,1,fp8,fp8,0,0.06693333387374878
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,128,1,float16,fp8,0,0.06569600105285645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,128,1,float16,float16,0,0.0662666658560435
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,128,1,fp8,fp8,0,0.05913066864013672
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,128,1,float16,float16,0,0.06715733309586842
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,128,1,float16,fp8,0,0.06608533362547557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,128,1,fp8,fp8,0,0.06186666587988535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,128,1,float16,float16,0,0.05489066739877065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,128,1,float16,fp8,0,0.054655998945236206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,128,1,fp8,fp8,0,0.04667200148105621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,128,1,float16,float16,0,0.05487466851870219
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,128,1,float16,fp8,0,0.053583999474843345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,128,1,fp8,fp8,0,0.04587199787298838
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,128,1,float16,float16,0,0.05448000133037567
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,128,1,float16,fp8,0,0.05420800050099691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,128,1,fp8,fp8,0,0.04613333443800608
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,128,1,float16,float16,0,0.0495306650797526
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,128,1,float16,float16,0,0.05067733426888784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,128,1,fp8,fp8,0,0.04222933451334635
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,128,1,float16,fp8,0,0.05036800106366476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,128,1,fp8,fp8,0,0.041893333196640015
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,128,1,float16,float16,0,0.05053333441416422
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,128,1,fp8,fp8,0,0.04213866591453552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,128,1,float16,fp8,0,0.05086400111516317
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,128,1,float16,float16,0,0.5943839947382609
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,128,1,float16,fp8,0,0.539903998374939
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,128,1,fp8,fp8,0,0.5156213442484537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,128,1,float16,float16,0,0.32505067189534503
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,128,1,float16,fp8,0,0.30316267410914105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,128,1,float16,float16,0,0.606112003326416
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,128,1,float16,fp8,0,0.5506240129470825
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,128,1,fp8,fp8,0,0.5237119992574056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,128,1,float16,float16,0,0.3083626627922058
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,128,1,fp8,fp8,0,0.26174400250116986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,128,1,float16,float16,0,0.31575467189153034
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,128,1,fp8,fp8,0,0.28573866685231525
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,128,1,float16,fp8,0,0.28571732838948566
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,128,1,float16,fp8,0,0.2938986619313558
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,128,1,float16,float16,0,0.16620266437530518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,128,1,fp8,fp8,0,0.26799466212590534
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,128,1,float16,fp8,0,0.16074666380882263
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,128,1,float16,float16,0,0.15226667126019797
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,128,1,float16,fp8,0,0.14757333199183145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,128,1,fp8,fp8,0,0.1495519975821177
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,128,1,float16,float16,0,0.1580586632092794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,128,1,float16,fp8,0,0.15110933780670166
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,128,1,fp8,fp8,0,0.13807466626167297
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,128,1,fp8,fp8,0,0.1430186629295349
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,128,1,float16,float16,0,0.08925867080688477
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,128,1,float16,fp8,0,0.08877866466840108
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,128,1,fp8,fp8,0,0.08640000224113464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,128,1,float16,fp8,0,0.08085333307584126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,128,1,fp8,fp8,0,0.07574933270613353
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,128,1,float16,float16,0,0.0844693382581075
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,128,1,float16,fp8,0,0.08416533470153809
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,128,1,fp8,fp8,0,0.0817440003156662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,128,1,float16,float16,0,0.05167999863624573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,128,1,float16,fp8,0,0.052058666944503784
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,128,1,float16,fp8,0,0.0490880012512207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,128,1,fp8,fp8,0,0.04952000081539154
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,128,1,float16,float16,0,0.05064533154169718
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,128,1,float16,float16,0,0.05087466537952423
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,128,1,fp8,fp8,0,0.04543466866016388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,128,1,float16,fp8,0,0.04975999891757965
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,128,1,fp8,fp8,0,0.046495998899141945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,128,1,float16,fp8,0,0.0448586642742157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,128,1,float16,float16,0,0.045040001471837364
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,128,1,fp8,fp8,0,0.039503999054431915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,128,1,float16,float16,0,0.04530666768550873
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,128,1,float16,fp8,0,0.044581333796183266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,128,1,float16,float16,0,0.04527466495831808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,128,1,fp8,fp8,0,0.038405333956082664
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,128,1,float16,fp8,0,0.044624000787734985
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,128,1,fp8,fp8,0,0.03920000046491623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,128,1,float16,float16,0,0.041135999063650765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,128,1,float16,fp8,0,0.04119999955097834
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,128,1,fp8,fp8,0,0.03615466753641764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,128,1,float16,float16,0,0.04188266893227895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,128,1,float16,fp8,0,0.04161600023508072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,128,1,float16,float16,0,0.04227200150489807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,128,1,float16,fp8,0,0.041984001795450844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,128,1,fp8,fp8,0,0.035743998984495796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,128,1,fp8,fp8,0,0.035375999907652535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,128,1,float16,float16,0,0.53602135181427
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,128,1,float16,fp8,0,0.5346879959106445
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,128,1,fp8,fp8,0,0.46800001462300617
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,128,1,float16,float16,0,0.3051519989967346
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,128,1,float16,fp8,0,0.2957493265469869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,128,1,float16,fp8,0,0.5418186585108439
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,128,1,float16,float16,0,0.5499200026194254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,128,1,fp8,fp8,0,0.5000160137812296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,128,1,float16,float16,0,0.28327999512354535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,128,1,float16,fp8,0,0.28039999802907306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,128,1,fp8,fp8,0,0.24615466594696045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,128,1,fp8,fp8,0,0.27985600630442303
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,128,1,float16,float16,0,0.16778665781021118
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,128,1,float16,fp8,0,0.28428266445795697
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,128,1,fp8,fp8,0,0.2568746606508891
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,128,1,float16,float16,0,0.2866453329722087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,128,1,float16,float16,0,0.15480533242225647
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,128,1,float16,fp8,0,0.1646293302377065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,128,1,fp8,fp8,0,0.1415733297665914
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,128,1,float16,fp8,0,0.15296000242233276
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,128,1,fp8,fp8,0,0.12947199741999307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,128,1,float16,float16,0,0.15833066900571188
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,128,1,float16,fp8,0,0.15602667133013406
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,128,1,fp8,fp8,0,0.13238933682441711
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,128,1,float16,float16,0,0.08819199601809184
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,128,1,float16,fp8,0,0.08714133501052856
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,128,1,float16,float16,0,0.07959466675917308
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,128,1,fp8,fp8,0,0.07846400141716003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,128,1,float16,fp8,0,0.07945066690444946
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,128,1,fp8,fp8,0,0.07295466462771098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,128,1,float16,float16,0,0.08130666613578796
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,128,1,float16,fp8,0,0.08075200021266937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,128,1,float16,float16,0,0.05065066615740458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,128,1,fp8,fp8,0,0.07572799921035767
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,128,1,float16,fp8,0,0.04945066571235657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,128,1,float16,float16,0,0.0460746685663859
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,128,1,fp8,fp8,0,0.048453330993652344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,128,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,128,1,float16,float16,0,0.04655466477076212
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,128,1,fp8,fp8,0,0.044309332966804504
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,128,1,float16,float16,0,0.03734933336575826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,128,1,fp8,fp8,0,0.041946664452552795
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,128,1,float16,fp8,0,0.03696533292531967
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,128,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,128,1,float16,float16,0,0.03568000098069509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,128,1,float16,fp8,0,0.04675200084845225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,128,1,fp8,fp8,0,0.031125334401925404
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,128,1,float16,float16,0,0.035930665830771126
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,128,1,float16,fp8,0,0.03542399903138479
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,128,1,fp8,fp8,0,0.03155199935038885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,128,1,float16,float16,0,0.03219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,128,1,float16,fp8,0,0.031930667658646904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,128,1,float16,float16,0,0.03201066702604294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,128,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,128,1,float16,fp8,0,0.032431999842325844
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,128,1,fp8,fp8,0,0.02845333268245061
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,128,1,fp8,fp8,0,0.03084266682465871
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,128,1,float16,float16,0,0.03250666707754135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,128,1,float16,fp8,0,0.03253866732120514
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,128,1,float16,float16,0,0.030106666187445324
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,128,1,fp8,fp8,0,0.02881066749493281
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,128,1,fp8,fp8,0,0.027002667387326557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,128,1,float16,float16,0,0.030799999833106995
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,128,1,float16,fp8,0,0.03121600051720937
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,128,1,float16,fp8,0,0.030213333666324615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,128,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,128,1,fp8,fp8,0,0.026799999177455902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,128,1,float16,float16,0,0.030826665461063385
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,128,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,4,1,128,1,float16,float16,0,0.4256426493326823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,4,1,128,1,fp8,fp8,0,0.39125335216522217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,4,128,1,float16,float16,0,0.25084267059961957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,4,1,128,1,float16,fp8,0,0.4222453435262044
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,4,128,1,float16,fp8,0,0.2424959937731425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,4,2,128,1,float16,float16,0,0.437882661819458
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,4,2,128,1,float16,fp8,0,0.43197866280873615
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,512,4,2,128,1,fp8,fp8,0,0.4230080048243205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,1,128,1,float16,fp8,0,0.22247999906539917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,1,128,1,float16,float16,0,0.2244266668955485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,1,128,1,fp8,fp8,0,0.2050079902013143
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,2,128,1,float16,float16,0,0.23153066635131836
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,4,128,1,float16,float16,0,0.13758400082588196
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,4,128,1,fp8,fp8,0,0.23736000061035156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,2,128,1,float16,fp8,0,0.2267413338025411
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,512,4,2,128,1,fp8,fp8,0,0.2174933354059855
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,1,128,1,float16,float16,0,0.12276800473531087
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,4,128,1,float16,fp8,0,0.13277332981427512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,1,128,1,float16,fp8,0,0.12291199962298076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,1,128,1,fp8,fp8,0,0.11246400078137715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,4,128,1,fp8,fp8,0,0.12338133653004964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,2,128,1,float16,float16,0,0.1275040010611216
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,2,128,1,fp8,fp8,0,0.11756799618403117
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,512,4,2,128,1,float16,fp8,0,0.12483200430870056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,1,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,4,128,1,float16,fp8,0,0.07416533430417378
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,4,128,1,fp8,fp8,0,0.07105599840482076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,4,128,1,float16,float16,0,0.07704533139864604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,1,128,1,fp8,fp8,0,0.062368000547091164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,2,128,1,float16,float16,0,0.06853333115577698
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,1,128,1,float16,fp8,0,0.06645866731802623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,2,128,1,float16,fp8,0,0.06820266445477803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,4,128,1,float16,float16,0,0.044165333112080894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,512,4,2,128,1,fp8,fp8,0,0.0662720004717509
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,4,128,1,float16,fp8,0,0.043194666504859924
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,1,128,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,1,128,1,float16,float16,0,0.03956266740957896
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,1,128,1,fp8,fp8,0,0.036805334190527596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,4,128,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,2,128,1,float16,float16,0,0.039893334110577904
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,2,128,1,float16,fp8,0,0.04009599983692169
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,512,4,2,128,1,fp8,fp8,0,0.037802666425704956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,1,128,1,float16,float16,0,0.02897600084543228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,4,128,1,float16,fp8,0,0.03051200012365977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,4,128,1,float16,float16,0,0.030661332110563915
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,4,128,1,fp8,fp8,0,0.028602667152881622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,2,128,1,float16,float16,0,0.028890666862328846
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,2,128,1,float16,fp8,0,0.02882666637500127
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,2,128,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,4,128,1,fp8,fp8,0,0.0244159996509552
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,4,128,1,float16,fp8,0,0.025578667720158894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,1,128,1,fp8,fp8,0,0.027850667635599773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,512,4,1,128,1,float16,fp8,0,0.028325334191322327
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,4,128,1,float16,float16,0,0.025194667279720306
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,1,128,1,float16,float16,0,0.02510933329661687
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,1,128,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,1,128,1,float16,fp8,0,0.025573333104451496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,2,128,1,float16,float16,0,0.02569066733121872
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,2,128,1,float16,fp8,0,0.02566933383544286
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,4,128,1,float16,fp8,0,0.023898666103680927
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,1,128,1,float16,float16,0,0.024293333292007446
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,1,128,1,fp8,fp8,0,0.02254933367172877
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,4,128,1,fp8,fp8,0,0.02221333235502243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,1,128,1,float16,fp8,0,0.024160000185171764
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,4,128,1,float16,float16,0,0.02367999901374181
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,512,4,2,128,1,fp8,fp8,0,0.023951999843120575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,2,128,1,float16,float16,0,0.024501333634058636
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,2,128,1,float16,fp8,0,0.02456533412138621
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,4,128,1,float16,float16,0,0.022815999885400135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,512,4,2,128,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,4,128,1,fp8,fp8,0,0.02181333303451538
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,4,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,1,128,1,fp8,fp8,0,0.02235200007756551
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,1,128,1,float16,float16,0,0.024010665714740753
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,1,128,1,float16,fp8,0,0.023797333240509033
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,2,128,1,float16,fp8,0,0.023760000864664715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,2,128,1,float16,float16,0,0.02390933285156886
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,512,4,2,128,1,fp8,fp8,0,0.022634667654832203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,4,1,128,1,float16,fp8,0,0.18834133942921957
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,4,1,128,1,fp8,fp8,0,0.18297600746154785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,4,1,128,1,float16,float16,0,0.18972265720367432
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,4,128,1,float16,float16,0,0.12204266587893169
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,4,2,128,1,fp8,fp8,0,0.19615999857584634
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,4,2,128,1,float16,float16,0,0.19835732380549112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,256,4,2,128,1,float16,fp8,0,0.19404800732930502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,4,128,1,float16,fp8,0,0.11973866820335388
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,1,128,1,float16,float16,0,0.10482666889826457
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,1,128,1,float16,fp8,0,0.10449066758155823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,4,128,1,fp8,fp8,0,0.11236266295115153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,1,128,1,fp8,fp8,0,0.10181867082913716
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,2,128,1,float16,float16,0,0.11040533582369487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,2,128,1,float16,fp8,0,0.10754666725794475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,4,128,1,float16,float16,0,0.07207466661930084
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,4,128,1,float16,fp8,0,0.0698773314555486
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,1,128,1,float16,float16,0,0.05835199852784475
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,4,128,1,fp8,fp8,0,0.06550933420658112
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,1,128,1,float16,fp8,0,0.059194669127464294
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,256,4,2,128,1,fp8,fp8,0,0.10545600454012553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,1,128,1,fp8,fp8,0,0.05726933479309082
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,2,128,1,float16,fp8,0,0.06148266792297363
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,4,128,1,float16,float16,0,0.04095466683308283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,2,128,1,fp8,fp8,0,0.061861331264177956
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,4,128,1,float16,fp8,0,0.040933333337306976
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,256,4,2,128,1,float16,float16,0,0.061834668119748436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,4,128,1,fp8,fp8,0,0.03922666609287262
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,1,128,1,float16,float16,0,0.036133334040641785
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,1,128,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,2,128,1,float16,float16,0,0.03704000016053518
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,1,128,1,fp8,fp8,0,0.034117333590984344
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,1,128,1,float16,float16,0,0.02609066665172577
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,2,128,1,fp8,fp8,0,0.035386666655540466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,256,4,2,128,1,float16,fp8,0,0.03707200040419897
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,4,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,4,128,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,4,128,1,float16,float16,0,0.02743999908367793
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,1,128,1,float16,fp8,0,0.025936000049114227
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,2,128,1,float16,float16,0,0.02644266684850057
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,2,128,1,float16,fp8,0,0.02622399975856145
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,1,128,1,fp8,fp8,0,0.02363733450571696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,256,4,2,128,1,fp8,fp8,0,0.024421334266662598
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,4,128,1,float16,fp8,0,0.022815999885400135
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,4,128,1,float16,float16,0,0.022330666581789654
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,4,128,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,1,128,1,float16,float16,0,0.02179733415444692
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,1,128,1,float16,fp8,0,0.022309333086013794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,1,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,2,128,1,float16,fp8,0,0.02248000105222066
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,2,128,1,float16,float16,0,0.022287999590237934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,4,128,1,float16,float16,0,0.0201706662774086
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,256,4,2,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,4,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,4,128,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,1,128,1,float16,float16,0,0.020725333442290623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,1,128,1,float16,fp8,0,0.020655999581019085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,1,128,1,fp8,fp8,0,0.019578666736682255
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,2,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,4,128,1,float16,float16,0,0.019658666104078293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,2,128,1,float16,float16,0,0.02089066555102666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,256,4,2,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,4,128,1,fp8,fp8,0,0.018725333114465077
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,4,128,1,float16,fp8,0,0.01985599969824155
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,1,128,1,float16,float16,0,0.020026666422684986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,1,128,1,float16,fp8,0,0.02089066555102666
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,1,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,2,128,1,float16,float16,0,0.019802667200565338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,4,128,1,float16,float16,0,0.01926933353145917
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,2,128,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,256,4,2,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,4,128,1,fp8,fp8,0,0.017984000345071156
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,1,128,1,float16,float16,0,0.019941333681344986
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,4,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,1,128,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,1,128,1,float16,fp8,0,0.019823999454577763
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,2,128,1,float16,float16,0,0.019754666835069656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,2,128,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,4,1,128,1,float16,fp8,0,0.10008000334103902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,256,4,2,128,1,float16,fp8,0,0.019839999576409657
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,4,1,128,1,float16,float16,0,0.09993066390355428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,4,1,128,1,fp8,fp8,0,0.10086933771769206
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,4,2,128,1,float16,float16,0,0.1053013304869334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,4,2,128,1,float16,fp8,0,0.10393066207567851
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,1,128,1,float16,float16,0,0.05640000104904175
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,128,4,2,128,1,fp8,fp8,0,0.10568533341089885
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,1,128,1,float16,fp8,0,0.056736002365748085
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,4,128,1,float16,fp8,0,0.06619733572006226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,4,128,1,float16,float16,0,0.06736533343791962
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,1,128,1,fp8,fp8,0,0.057573333382606506
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,2,128,1,float16,float16,0,0.05965333183606466
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,4,128,1,fp8,fp8,0,0.06504533191521962
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,2,128,1,fp8,fp8,0,0.060565332571665444
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,128,4,2,128,1,float16,fp8,0,0.058677335580190025
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,4,128,1,float16,fp8,0,0.03849600007136663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,1,128,1,float16,float16,0,0.03488533447186152
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,1,128,1,float16,fp8,0,0.03494933247566223
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,4,128,1,float16,float16,0,0.039317332208156586
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,4,128,1,fp8,fp8,0,0.03910933434963226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,1,128,1,fp8,fp8,0,0.03389866650104523
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,2,128,1,float16,float16,0,0.035690667728583016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,4,128,1,float16,fp8,0,0.02651199946800868
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,2,128,1,float16,fp8,0,0.035455999275048576
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,128,4,2,128,1,fp8,fp8,0,0.035642666121323906
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,4,128,1,float16,float16,0,0.02646933247645696
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,4,128,1,fp8,fp8,0,0.02496533344189326
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,1,128,1,fp8,fp8,0,0.02382933348417282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,2,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,1,128,1,float16,fp8,0,0.024800000091393787
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,1,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,4,128,1,float16,float16,0,0.020773333807786305
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,2,128,1,fp8,fp8,0,0.024288001159826916
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,4,128,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,128,4,2,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,1,128,1,float16,float16,0,0.020421333611011505
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,4,128,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,1,128,1,float16,fp8,0,0.020746666938066483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,2,128,1,float16,float16,0,0.020453333854675293
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,1,128,1,fp8,fp8,0,0.02088533341884613
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,2,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,128,4,2,128,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,1,128,1,float16,float16,0,0.019445333629846573
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,4,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,4,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,1,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,4,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,2,128,1,float16,float16,0,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,4,128,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,2,128,1,float16,fp8,0,0.019845332950353622
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,4,128,1,float16,float16,0,0.01836799954374631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,1,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,128,4,2,128,1,fp8,fp8,0,0.022634667654832203
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,4,128,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,1,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,1,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,4,128,1,float16,fp8,0,0.01838933303952217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,2,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,2,128,1,float16,float16,0,0.018645333747069042
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,1,128,1,float16,float16,0,0.018394666413466137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,4,128,1,fp8,fp8,0,0.018277333428462345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,128,4,2,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,4,128,1,float16,float16,0,0.01970133309563001
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,1,128,1,float16,float16,0,0.01834133391578992
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,1,128,1,float16,fp8,0,0.0183999997874101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,1,128,1,fp8,fp8,0,0.018405333161354065
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,2,128,1,float16,float16,0,0.017925333231687546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,2,128,1,float16,fp8,0,0.018437333405017853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,128,4,2,128,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,4,128,1,fp8,fp8,0,0.01855466639002164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,1,128,1,float16,float16,0,0.0183999997874101
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,4,128,1,float16,float16,0,0.017594666530688603
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,4,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,1,128,1,float16,fp8,0,0.018351999421914417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,2,128,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,2,128,1,float16,float16,0,0.017893332988023758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,1,128,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,128,4,2,128,1,fp8,fp8,0,0.018570666511853535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,4,1,128,1,float16,float16,0,0.06520000100135803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,4,2,128,1,float16,float16,0,0.06811733543872833
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,4,1,128,1,float16,fp8,0,0.06555200119813283
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,4,2,128,1,float16,fp8,0,0.06717333197593689
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,4,1,128,1,fp8,fp8,0,0.07192533214886983
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,64,4,2,128,1,fp8,fp8,0,0.07496533294518788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,4,128,1,float16,fp8,0,0.0393653338154157
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,4,128,1,float16,float16,0,0.040565334260463715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,4,128,1,fp8,fp8,0,0.046709333856900535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,1,128,1,float16,float16,0,0.03819733361403147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,1,128,1,fp8,fp8,0,0.04186666508515676
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,2,128,1,float16,fp8,0,0.03912533322970072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,1,128,1,float16,fp8,0,0.03886399914820989
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,4,128,1,float16,fp8,0,0.02701866626739502
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,2,128,1,fp8,fp8,0,0.0440586656332016
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,4,128,1,float16,float16,0,0.026906666656335194
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,64,4,2,128,1,float16,float16,0,0.03896533449490865
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,4,128,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,1,128,1,float16,fp8,0,0.026288000245889027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,1,128,1,float16,float16,0,0.026250667870044708
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,2,128,1,float16,float16,0,0.026346666117509205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,2,128,1,float16,fp8,0,0.02666666607062022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,2,128,1,fp8,fp8,0,0.028245332340399425
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,64,4,1,128,1,fp8,fp8,0,0.027776000400384266
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,4,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,4,128,1,float16,float16,0,0.020725333442290623
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,1,128,1,float16,float16,0,0.020282667130231857
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,1,128,1,float16,fp8,0,0.02000533292690913
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,4,128,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,1,128,1,fp8,fp8,0,0.020831999679406483
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,2,128,1,float16,float16,0,0.02013333390156428
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,2,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,4,128,1,float16,fp8,0,0.018618666877349217
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,4,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,64,4,2,128,1,float16,fp8,0,0.02038399999340375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,1,128,1,float16,fp8,0,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,1,128,1,float16,float16,0,0.018298666924238205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,4,128,1,float16,float16,0,0.017893332988023758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,2,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,2,128,1,float16,float16,0,0.018394666413466137
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,2,128,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,64,4,1,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,4,128,1,float16,fp8,0,0.017658667018016178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,4,128,1,fp8,fp8,0,0.018751999984184902
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,4,128,1,float16,float16,0,0.017498667041460674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,1,128,1,float16,fp8,0,0.018298666924238205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,2,128,1,float16,float16,0,0.017375999440749485
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,1,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,2,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,1,128,1,float16,float16,0,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,4,128,1,float16,float16,0,0.016549333930015564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,64,4,2,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,4,128,1,fp8,fp8,0,0.01828266680240631
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,1,128,1,float16,float16,0,0.017263999829689663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,4,128,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,1,128,1,float16,fp8,0,0.017759999881188076
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,1,128,1,fp8,fp8,0,0.018677332748969395
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,2,128,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,2,128,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,64,4,2,128,1,float16,float16,0,0.01738133281469345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,4,128,1,float16,float16,0,0.016762666404247284
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,4,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,1,128,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,1,128,1,float16,float16,0,0.016741332908471424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,1,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,4,128,1,fp8,fp8,0,0.017685333887736004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,2,128,1,fp8,fp8,0,0.018042666216691334
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,4,128,1,float16,float16,0,0.016261332978804905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,4,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,2,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,4,128,1,fp8,fp8,0,0.017802666872739792
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,64,4,2,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,1,128,1,float16,float16,0,0.016469333320856094
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,1,128,1,fp8,fp8,0,0.018144000321626663
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,1,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,2,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,2,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,4,1,128,1,float16,float16,0,0.046485334634780884
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,64,4,2,128,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,4,1,128,1,fp8,fp8,0,0.05794133245944977
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,4,1,128,1,float16,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,4,2,128,1,float16,float16,0,0.04765866696834564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,4,2,128,1,fp8,fp8,0,0.05972266693909963
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,4,128,1,float16,float16,0,0.03017599880695343
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,32,4,2,128,1,float16,fp8,0,0.04713066418965658
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,4,128,1,float16,fp8,0,0.03070399910211563
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,1,128,1,float16,float16,0,0.03014933317899704
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,4,128,1,fp8,fp8,0,0.03691199918588003
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,1,128,1,float16,fp8,0,0.029690665503342945
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,1,128,1,fp8,fp8,0,0.0364479993780454
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,2,128,1,float16,float16,0,0.03052799900372823
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,2,128,1,float16,fp8,0,0.030447999636332195
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,4,128,1,float16,float16,0,0.02242133269707362
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,4,128,1,float16,fp8,0,0.022117334107557934
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,32,4,2,128,1,fp8,fp8,0,0.03661333272854487
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,1,128,1,float16,fp8,0,0.02204799900452296
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,1,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,1,128,1,float16,float16,0,0.021722666919231415
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,4,128,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,2,128,1,float16,float16,0,0.022154666483402252
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,2,128,1,float16,fp8,0,0.022341333329677582
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,32,4,2,128,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,4,128,1,float16,fp8,0,0.01860800012946129
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,4,128,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,1,128,1,float16,fp8,0,0.018021332720915478
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,4,128,1,float16,float16,0,0.018079999834299088
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,1,128,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,2,128,1,float16,fp8,0,0.0185759998857975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,2,128,1,float16,float16,0,0.01798933371901512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,1,128,1,float16,float16,0,0.017637333522240322
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,32,4,2,128,1,fp8,fp8,0,0.020224000016848247
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,4,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,4,128,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,1,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,1,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,1,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,4,128,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,2,128,1,float16,fp8,0,0.017765333255132038
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,2,128,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,4,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,4,128,1,float16,float16,0,0.016314666718244553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,32,4,2,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,4,128,1,fp8,fp8,0,0.017797333498795826
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,1,128,1,float16,float16,0,0.01676799977819125
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,1,128,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,2,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,1,128,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,2,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,32,4,2,128,1,fp8,fp8,0,0.018378666291634243
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,4,128,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,4,128,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,1,128,1,float16,float16,0,0.016501333564519882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,1,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,4,128,1,float16,float16,0,0.015578666081031164
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,2,128,1,float16,float16,0,0.016442666451136272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,2,128,1,fp8,fp8,0,0.017893332988023758
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,2,128,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,4,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,32,4,1,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,4,128,1,float16,fp8,0,0.016149333367745083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,4,128,1,fp8,fp8,0,0.01798933371901512
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,1,128,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,1,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,2,128,1,float16,fp8,0,0.016480000068744022
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,2,128,1,float16,float16,0,0.016074666132529575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,4,128,1,float16,float16,0,0.01562133307258288
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,4,128,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,1,128,1,float16,float16,0,0.016197333733240765
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,32,4,2,128,1,fp8,fp8,0,0.018197332819302876
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,4,128,1,fp8,fp8,0,0.017583999782800674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,1,128,1,float16,float16,0,0.015935999651749928
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,1,128,1,float16,fp8,0,0.01646399994691213
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,1,128,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,2,128,1,float16,float16,0,0.016442666451136272
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,2,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,4,1,128,1,float16,fp8,0,0.04155199974775314
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,32,4,2,128,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,4,1,128,1,fp8,fp8,0,0.05188799897829691
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,4,1,128,1,float16,float16,0,0.04155733436346054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,4,2,128,1,float16,float16,0,0.04251733422279358
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,4,2,128,1,float16,fp8,0,0.04197866717974345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,256,16,4,2,128,1,fp8,fp8,0,0.05299200117588043
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,4,128,1,float16,float16,0,0.0271573339899381
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,4,128,1,float16,fp8,0,0.026752000053723652
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,1,128,1,float16,float16,0,0.027962667246659596
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,1,128,1,fp8,fp8,0,0.03341866781314214
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,2,128,1,float16,float16,0,0.02775999903678894
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,2,128,1,float16,fp8,0,0.027999999622503918
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,2,128,1,fp8,fp8,0,0.033861334125200905
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,4,128,1,fp8,fp8,0,0.03325333446264267
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,128,16,4,1,128,1,float16,fp8,0,0.02812266598145167
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,4,128,1,float16,float16,0,0.02029866725206375
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,4,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,4,128,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,1,128,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,1,128,1,fp8,fp8,0,0.02409599969784419
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,1,128,1,float16,float16,0,0.02060266708334287
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,2,128,1,float16,float16,0,0.02075200031201045
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,2,128,1,fp8,fp8,0,0.023818666736284893
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,4,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,64,16,4,2,128,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,4,128,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,4,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,1,128,1,float16,float16,0,0.017583999782800674
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,1,128,1,float16,fp8,0,0.017781333376963932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,2,128,1,float16,float16,0,0.017360000560681026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,2,128,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,1,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,4,128,1,float16,float16,0,0.016517333686351776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,4,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,4,128,1,fp8,fp8,0,0.018346666047970455
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,32,16,4,2,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,1,128,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,1,128,1,fp8,fp8,0,0.018122666825850803
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,1,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,2,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,2,128,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,4,128,1,float16,float16,0,0.01605333387851715
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,16,16,4,2,128,1,float16,float16,0,0.016517333686351776
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,1,128,1,float16,float16,0,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,4,128,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,1,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,4,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,2,128,1,float16,float16,0,0.016682667036851246
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,2,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,1,128,1,fp8,fp8,0,0.01820266619324684
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,4,128,1,float16,float16,0,0.015583999454975128
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,4,128,1,float16,fp8,0,0.01598400001724561
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,8,16,4,2,128,1,fp8,fp8,0,0.01802666609485944
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,4,128,1,fp8,fp8,0,0.017658667018016178
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,1,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,1,128,1,float16,float16,0,0.016314666718244553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,1,128,1,fp8,fp8,0,0.01801066721479098
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,2,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,2,128,1,float16,float16,0,0.016501333564519882
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,4,16,4,2,128,1,fp8,fp8,0,0.017925333231687546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,4,128,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,4,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,1,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,4,128,1,fp8,fp8,0,0.017840000490347546
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,2,128,1,float16,fp8,0,0.016074666132529575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,1,128,1,float16,fp8,0,0.016314666718244553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,2,128,1,float16,float16,0,0.01609066625436147
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,1,128,1,float16,float16,0,0.016074666132529575
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,2,16,4,2,128,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,4,128,1,float16,fp8,0,0.015824000040690105
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,1,128,1,float16,float16,0,0.015765332927306492
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,4,128,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,1,128,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,4,128,1,float16,float16,0,0.021429332594076794
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,1,128,1,fp8,fp8,0,0.017845333864291508
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,2,128,1,float16,fp8,0,0.016314666718244553
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,2,128,1,float16,float16,0,0.016389333953460056
TRTLLM,1.0.0rc3,NVIDIA H200,context_attention,torch_flow,1,16,4,2,128,1,fp8,fp8,0,0.018031999468803406
